dcnum 0.23.2__py3-none-any.whl → 0.25.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dcnum might be problematic. Click here for more details.
- dcnum/_version.py +2 -2
- dcnum/feat/event_extractor_manager_thread.py +6 -5
- dcnum/feat/feat_background/base.py +24 -9
- dcnum/feat/feat_background/bg_sparse_median.py +56 -30
- dcnum/logic/ctrl.py +94 -43
- dcnum/meta/ppid.py +4 -3
- dcnum/read/__init__.py +1 -0
- dcnum/read/cache.py +4 -3
- dcnum/read/detect_flicker.py +44 -0
- dcnum/read/hdf5_data.py +138 -70
- dcnum/read/mapped.py +15 -2
- dcnum/segm/segm_torch/segm_torch_mpo.py +4 -1
- dcnum/write/__init__.py +1 -1
- dcnum/write/queue_collector_thread.py +7 -14
- dcnum/write/writer.py +149 -36
- {dcnum-0.23.2.dist-info → dcnum-0.25.1.dist-info}/METADATA +1 -1
- {dcnum-0.23.2.dist-info → dcnum-0.25.1.dist-info}/RECORD +20 -19
- {dcnum-0.23.2.dist-info → dcnum-0.25.1.dist-info}/WHEEL +1 -1
- {dcnum-0.23.2.dist-info → dcnum-0.25.1.dist-info}/LICENSE +0 -0
- {dcnum-0.23.2.dist-info → dcnum-0.25.1.dist-info}/top_level.txt +0 -0
dcnum/_version.py
CHANGED
|
@@ -96,12 +96,13 @@ class EventExtractorManagerThread(threading.Thread):
|
|
|
96
96
|
# If the writer_dq starts filling up, then this could lead to
|
|
97
97
|
# an oom-kill signal. Stall for the writer to prevent this.
|
|
98
98
|
if (ldq := len(self.writer_dq)) > 1000:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
99
|
+
stalled_sec = 0.
|
|
100
|
+
for ii in range(60):
|
|
101
|
+
if len(self.writer_dq) > 200:
|
|
102
|
+
time.sleep(.5)
|
|
103
|
+
stalled_sec += .5
|
|
103
104
|
self.logger.warning(
|
|
104
|
-
f"Stalled {
|
|
105
|
+
f"Stalled {stalled_sec:.1f}s due to slow writer "
|
|
105
106
|
f"({ldq} chunks queued)")
|
|
106
107
|
|
|
107
108
|
unavailable_slots = 0
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import abc
|
|
2
2
|
import functools
|
|
3
3
|
import inspect
|
|
4
|
+
import logging
|
|
4
5
|
import multiprocessing as mp
|
|
5
6
|
import pathlib
|
|
7
|
+
import time
|
|
6
8
|
|
|
7
9
|
import h5py
|
|
8
10
|
|
|
@@ -41,8 +43,11 @@ class Background(abc.ABC):
|
|
|
41
43
|
kwargs:
|
|
42
44
|
Additional keyword arguments passed to the subclass.
|
|
43
45
|
"""
|
|
46
|
+
self.logger = logging.getLogger(
|
|
47
|
+
f"dcnum.feat.feat_background.{self.__class__.__name__}")
|
|
44
48
|
# proper conversion to Path objects
|
|
45
49
|
output_path = pathlib.Path(output_path)
|
|
50
|
+
self.output_path = output_path
|
|
46
51
|
if isinstance(input_data, str):
|
|
47
52
|
input_data = pathlib.Path(input_data)
|
|
48
53
|
# kwargs checks
|
|
@@ -188,20 +193,30 @@ class Background(abc.ABC):
|
|
|
188
193
|
return self.image_proc.value
|
|
189
194
|
|
|
190
195
|
def process(self):
|
|
196
|
+
"""Perform the background computation
|
|
197
|
+
|
|
198
|
+
This irreversibly removes/overrides any "image_bg" and
|
|
199
|
+
"bg_off" features defined in the output file `self.h5out`.
|
|
200
|
+
"""
|
|
201
|
+
t0 = time.perf_counter()
|
|
191
202
|
# Delete any old background data
|
|
192
|
-
for
|
|
193
|
-
|
|
194
|
-
|
|
203
|
+
for ds_key in ["image_bg", "bg_off"]:
|
|
204
|
+
for grp_key in ["events", "basin_events"]:
|
|
205
|
+
if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
|
|
206
|
+
del self.h5out[grp_key][ds_key]
|
|
195
207
|
# Perform the actual background computation
|
|
196
208
|
self.process_approach()
|
|
197
209
|
bg_ppid = self.get_ppid()
|
|
198
210
|
# Store pipeline information in the image_bg/bg_off feature
|
|
199
|
-
for
|
|
200
|
-
|
|
201
|
-
self.h5out
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
211
|
+
for ds_key in ["image_bg", "bg_off"]:
|
|
212
|
+
for grp_key in ["events", "basin_events"]:
|
|
213
|
+
if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
|
|
214
|
+
self.h5out[f"{grp_key}/{ds_key}"].attrs[
|
|
215
|
+
"dcnum ppid background"] = bg_ppid
|
|
216
|
+
self.h5out[F"{grp_key}/{ds_key}"].attrs[
|
|
217
|
+
"dcnum ppid generation"] = ppid.DCNUM_PPID_GENERATION
|
|
218
|
+
self.logger.info(
|
|
219
|
+
f"Background computation time: {time.perf_counter()-t0:.1f}s")
|
|
205
220
|
|
|
206
221
|
@abc.abstractmethod
|
|
207
222
|
def process_approach(self):
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import queue
|
|
3
2
|
import time
|
|
4
3
|
|
|
@@ -9,14 +8,13 @@ from ...read import HDF5Data
|
|
|
9
8
|
|
|
10
9
|
from .base import mp_spawn, Background
|
|
11
10
|
|
|
12
|
-
logger = logging.getLogger(__name__)
|
|
13
|
-
|
|
14
11
|
|
|
15
12
|
class BackgroundSparseMed(Background):
|
|
16
13
|
def __init__(self, input_data, output_path, kernel_size=200,
|
|
17
14
|
split_time=1., thresh_cleansing=0, frac_cleansing=.8,
|
|
18
15
|
offset_correction=True,
|
|
19
|
-
compress=True,
|
|
16
|
+
compress=True,
|
|
17
|
+
num_cpus=None):
|
|
20
18
|
"""Sparse median background correction with cleansing
|
|
21
19
|
|
|
22
20
|
In contrast to the rolling median background correction,
|
|
@@ -61,7 +59,7 @@ class BackgroundSparseMed(Background):
|
|
|
61
59
|
offset_correction: bool
|
|
62
60
|
The sparse median background correction produces one median
|
|
63
61
|
image for multiple input frames (BTW this also leads to very
|
|
64
|
-
efficient data storage with HDF5
|
|
62
|
+
efficient data storage with internal HDF5 basins). In
|
|
65
63
|
case the input frames are subject to frame-by-frame brightness
|
|
66
64
|
variations (e.g. flickering of the illumination source), it
|
|
67
65
|
is useful to have an offset value per frame that can then be
|
|
@@ -79,6 +77,11 @@ class BackgroundSparseMed(Background):
|
|
|
79
77
|
num_cpus: int
|
|
80
78
|
Number of CPUs to use for median computation. Defaults to
|
|
81
79
|
`multiprocessing.cpu_count()`.
|
|
80
|
+
|
|
81
|
+
.. versionchanged:: 0.23.5
|
|
82
|
+
|
|
83
|
+
The background image data are stored as an internal
|
|
84
|
+
mapped basin to reduce the output file size.
|
|
82
85
|
"""
|
|
83
86
|
super(BackgroundSparseMed, self).__init__(
|
|
84
87
|
input_data=input_data,
|
|
@@ -93,7 +96,7 @@ class BackgroundSparseMed(Background):
|
|
|
93
96
|
)
|
|
94
97
|
|
|
95
98
|
if kernel_size > len(self.input_data):
|
|
96
|
-
logger.warning(
|
|
99
|
+
self.logger.warning(
|
|
97
100
|
f"The kernel size {kernel_size} is too large for input data"
|
|
98
101
|
f"size {len(self.input_data)}. Setting it to input data size!")
|
|
99
102
|
kernel_size = len(self.input_data)
|
|
@@ -126,13 +129,14 @@ class BackgroundSparseMed(Background):
|
|
|
126
129
|
else:
|
|
127
130
|
# compute time using frame rate (approximate)
|
|
128
131
|
dur = self.image_count / fr * 1.5
|
|
129
|
-
logger.info(
|
|
132
|
+
self.logger.info(
|
|
133
|
+
f"Approximating duration: {dur/60:.1f}min")
|
|
130
134
|
self.time = np.linspace(0, dur, self.image_count,
|
|
131
135
|
endpoint=True)
|
|
132
136
|
if self.time is None:
|
|
133
137
|
# No HDF5 file or no information therein; Make an educated guess.
|
|
134
138
|
dur = self.image_count / 3600 * 1.5
|
|
135
|
-
logger.info(f"Guessing duration: {dur/60:.1f}min")
|
|
139
|
+
self.logger.info(f"Guessing duration: {dur/60:.1f}min")
|
|
136
140
|
self.time = np.linspace(0, dur, self.image_count,
|
|
137
141
|
endpoint=True)
|
|
138
142
|
|
|
@@ -222,7 +226,7 @@ class BackgroundSparseMed(Background):
|
|
|
222
226
|
offset_correction: bool
|
|
223
227
|
The sparse median background correction produces one median
|
|
224
228
|
image for multiple input frames (BTW this also leads to very
|
|
225
|
-
efficient data storage with HDF5
|
|
229
|
+
efficient data storage with internal HDF5 basins). In
|
|
226
230
|
case the input frames are subject to frame-by-frame brightness
|
|
227
231
|
variations (e.g. flickering of the illumination source), it
|
|
228
232
|
is useful to have an offset value per frame that can then be
|
|
@@ -301,18 +305,18 @@ class BackgroundSparseMed(Background):
|
|
|
301
305
|
thresh = np.quantile(ref, self.frac_cleansing)
|
|
302
306
|
used = ref <= thresh
|
|
303
307
|
frac_remove = np.sum(~used) / used.size
|
|
304
|
-
logger.warning(
|
|
308
|
+
self.logger.warning(
|
|
305
309
|
f"{frac_remove_user:.1%} of the background images would "
|
|
306
310
|
f"be removed with the current settings, so we enforce "
|
|
307
311
|
f"`frac_cleansing`. To avoid this warning, try decreasing "
|
|
308
312
|
f"`thresh_cleansing` or `frac_cleansing`. The new "
|
|
309
313
|
f"threshold is {thresh_fact / thresh}.")
|
|
310
314
|
|
|
311
|
-
logger.info(f"Cleansed {frac_remove:.2%}")
|
|
315
|
+
self.logger.info(f"Cleansed {frac_remove:.2%}")
|
|
312
316
|
step_times = self.step_times[used]
|
|
313
317
|
bg_images = self.bg_images[used]
|
|
314
318
|
else:
|
|
315
|
-
logger.info("Background series cleansing disabled")
|
|
319
|
+
self.logger.info("Background series cleansing disabled")
|
|
316
320
|
step_times = self.step_times
|
|
317
321
|
bg_images = self.bg_images
|
|
318
322
|
|
|
@@ -322,35 +326,55 @@ class BackgroundSparseMed(Background):
|
|
|
322
326
|
idx1 = None
|
|
323
327
|
for ii in range(len(step_times)):
|
|
324
328
|
t1 = step_times[ii]
|
|
325
|
-
idx1 = np.argmin(np.abs(self.time - t1
|
|
329
|
+
idx1 = np.argmin(np.abs(self.time - t1 - self.split_time/2))
|
|
326
330
|
bg_idx[idx0:idx1] = ii
|
|
327
331
|
idx0 = idx1
|
|
328
332
|
if idx1 is not None:
|
|
329
333
|
# Fill up remainder of index array with last entry
|
|
330
334
|
bg_idx[idx1:] = ii
|
|
331
335
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
336
|
+
# Store the background images as an internal mapped basin
|
|
337
|
+
self.writer.store_basin(
|
|
338
|
+
name="background images",
|
|
339
|
+
description=f"Pipeline identifier: {self.get_ppid()}",
|
|
340
|
+
mapping=bg_idx,
|
|
341
|
+
internal_data={"image_bg": bg_images}
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# store the offset correction, if applicable
|
|
345
|
+
if self.offset_correction:
|
|
346
|
+
self.logger.info("Computing offset correction")
|
|
347
|
+
# compute the mean at the top of all background images
|
|
348
|
+
sh, sw = self.input_data.shape[1:]
|
|
349
|
+
roi_full = (slice(None), slice(0, 20), slice(0, sw))
|
|
350
|
+
bg_data_mean = np.mean(bg_images[roi_full], axis=(1, 2))
|
|
351
|
+
pos = 0
|
|
352
|
+
step = self.writer.get_best_nd_chunks(item_shape=(sh, sw),
|
|
353
|
+
feat_dtype=np.uint8)[0]
|
|
354
|
+
bg_off = np.zeros(self.image_count, dtype=float)
|
|
355
|
+
# For every chunk in the input image data, compute that
|
|
356
|
+
# value as well and store the resulting offset value.
|
|
357
|
+
# TODO: Could this be parallelized, or are we limited in reading?
|
|
358
|
+
while pos < self.image_count:
|
|
359
|
+
stop = min(pos + step, self.image_count)
|
|
343
360
|
# Record background offset correction "bg_off". We take a
|
|
344
361
|
# slice of 20px from the top of the image (there are normally
|
|
345
362
|
# no events here, only the channel walls are visible).
|
|
346
|
-
|
|
347
|
-
|
|
363
|
+
cur_slice = slice(pos, stop)
|
|
364
|
+
# mean background brightness
|
|
365
|
+
val_bg = bg_data_mean[bg_idx[cur_slice]]
|
|
366
|
+
# mean image brightness
|
|
348
367
|
roi_cur = (cur_slice, slice(0, 20), slice(0, sw))
|
|
349
|
-
val_bg = np.mean(cur_bg_data[roi_full], axis=(1, 2))
|
|
350
368
|
val_dat = np.mean(self.input_data[roi_cur], axis=(1, 2))
|
|
351
369
|
# background image = image_bg + bg_off
|
|
352
|
-
|
|
353
|
-
|
|
370
|
+
bg_off[cur_slice] = val_dat - val_bg
|
|
371
|
+
# set progress
|
|
372
|
+
self.image_proc.value = 0.5 * (1 + pos / self.image_count)
|
|
373
|
+
pos = stop
|
|
374
|
+
# finally, store the background offset feature
|
|
375
|
+
self.writer.store_feature_chunk("bg_off", bg_off)
|
|
376
|
+
|
|
377
|
+
self.image_proc.value = 1
|
|
354
378
|
|
|
355
379
|
def process_second(self,
|
|
356
380
|
ii: int,
|
|
@@ -393,7 +417,9 @@ class BackgroundSparseMed(Background):
|
|
|
393
417
|
|
|
394
418
|
self.bg_images[ii] = self.shared_output.reshape(self.image_shape)
|
|
395
419
|
|
|
396
|
-
self.image_proc.value = idx_stop /
|
|
420
|
+
self.image_proc.value = idx_stop / (
|
|
421
|
+
# with offset correction, everything is slower
|
|
422
|
+
self.image_count * (1 + self.offset_correction))
|
|
397
423
|
|
|
398
424
|
|
|
399
425
|
class WorkerSparseMed(mp_spawn.Process):
|
dcnum/logic/ctrl.py
CHANGED
|
@@ -34,6 +34,7 @@ from ..write import (
|
|
|
34
34
|
from .job import DCNumPipelineJob
|
|
35
35
|
from .json_encoder import ExtendedJSONEncoder
|
|
36
36
|
|
|
37
|
+
|
|
37
38
|
# Force using "spawn" method for multiprocessing, because we are using
|
|
38
39
|
# queues and threads and would end up with race conditions otherwise.
|
|
39
40
|
mp_spawn = mp.get_context("spawn")
|
|
@@ -402,6 +403,12 @@ class DCNumJobRunner(threading.Thread):
|
|
|
402
403
|
features=orig_feats,
|
|
403
404
|
mapping=None)
|
|
404
405
|
|
|
406
|
+
# Handle basin data according to the user's request
|
|
407
|
+
self.state = "plumbing"
|
|
408
|
+
self.task_enforce_basin_strategy()
|
|
409
|
+
|
|
410
|
+
self.state = "cleanup"
|
|
411
|
+
|
|
405
412
|
with HDF5Writer(self.path_temp_out) as hw:
|
|
406
413
|
# pipeline metadata
|
|
407
414
|
hw.h5.attrs["pipeline:dcnum generation"] = self.ppdict["gen_id"]
|
|
@@ -461,11 +468,7 @@ class DCNumJobRunner(threading.Thread):
|
|
|
461
468
|
|
|
462
469
|
# copy metadata/logs/tables from original file
|
|
463
470
|
with h5py.File(self.job["path_in"]) as h5_src:
|
|
464
|
-
copy_metadata(h5_src=h5_src,
|
|
465
|
-
h5_dst=hw.h5,
|
|
466
|
-
# Don't copy basins, we would have to index-map
|
|
467
|
-
# them first.
|
|
468
|
-
copy_basins=False)
|
|
471
|
+
copy_metadata(h5_src=h5_src, h5_dst=hw.h5)
|
|
469
472
|
if redo_seg:
|
|
470
473
|
# Store the correct measurement identifier. This is used to
|
|
471
474
|
# identify this file as a correct basin in subsequent pipeline
|
|
@@ -489,12 +492,6 @@ class DCNumJobRunner(threading.Thread):
|
|
|
489
492
|
mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
|
|
490
493
|
hw.h5.attrs["experiment:run identifier"] = mid_new
|
|
491
494
|
|
|
492
|
-
# Handle basin data according to the user's request
|
|
493
|
-
self.state = "plumbing"
|
|
494
|
-
self.task_enforce_basin_strategy()
|
|
495
|
-
|
|
496
|
-
self.state = "cleanup"
|
|
497
|
-
|
|
498
495
|
trun = datetime.timedelta(seconds=round(time.monotonic() - time_start))
|
|
499
496
|
self.logger.info(f"Run duration: {str(trun)}")
|
|
500
497
|
self.logger.info(time.strftime("Run stop: %Y-%m-%d-%H.%M.%S",
|
|
@@ -546,24 +543,19 @@ class DCNumJobRunner(threading.Thread):
|
|
|
546
543
|
"""
|
|
547
544
|
self._progress_bn = 0
|
|
548
545
|
t0 = time.perf_counter()
|
|
549
|
-
# We
|
|
550
|
-
#
|
|
551
|
-
#
|
|
552
|
-
#
|
|
553
|
-
#
|
|
554
|
-
# features
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
# 3. image features from the input file
|
|
563
|
-
[self.draw.h5, ["image", "image_bg", "bg_off"], "optional"],
|
|
564
|
-
]
|
|
565
|
-
with h5py.File(self.path_temp_out, "a") as hout:
|
|
566
|
-
hw = HDF5Writer(hout)
|
|
546
|
+
# We have these points to consider:
|
|
547
|
+
# - We must use the `basinmap` feature to map from the original
|
|
548
|
+
# file to the output file.
|
|
549
|
+
# - We must copy "bg_off" and "image_bg" to the output file.
|
|
550
|
+
# - For the "drain" basin strategy, we also have to copy all the
|
|
551
|
+
# other features.
|
|
552
|
+
# - If "image_bg" is defined as an internal basin in the input
|
|
553
|
+
# file, we have to convert the mapping and store a corresponding
|
|
554
|
+
# internal basin in the output file.
|
|
555
|
+
|
|
556
|
+
# Determine the basinmap feature
|
|
557
|
+
with HDF5Writer(self.path_temp_out) as hw:
|
|
558
|
+
hout = hw.h5
|
|
567
559
|
# First, we have to determine the basin mapping from input to
|
|
568
560
|
# output. This information is stored by the QueueCollectorThread
|
|
569
561
|
# in the "basinmap0" feature, ready to be used by us.
|
|
@@ -576,21 +568,22 @@ class DCNumJobRunner(threading.Thread):
|
|
|
576
568
|
# mapping of the input file was set to slice(1, 100), then the
|
|
577
569
|
# first image would not be there, and we would have
|
|
578
570
|
# [1, 1, 1, ...].
|
|
579
|
-
idx_um = hout["events/index_unmapped"]
|
|
571
|
+
idx_um = hout["events/index_unmapped"][:]
|
|
580
572
|
|
|
581
573
|
# If we want to convert this to an actual basinmap feature,
|
|
582
574
|
# then we have to convert those indices to indices that map
|
|
583
575
|
# to the original input HDF5 file.
|
|
584
576
|
raw_im = self.draw.index_mapping
|
|
585
577
|
if raw_im is None:
|
|
586
|
-
self.logger.info("Input file mapped with basinmap0")
|
|
587
578
|
# Create a hard link to save time and space
|
|
588
579
|
hout["events/basinmap0"] = hout["events/index_unmapped"]
|
|
589
|
-
|
|
580
|
+
basinmap0 = idx_um
|
|
590
581
|
else:
|
|
591
|
-
|
|
582
|
+
self.logger.info("Converting input mapping")
|
|
583
|
+
basinmap0 = get_mapping_indices(raw_im)[idx_um]
|
|
592
584
|
# Store the mapped basin data in the output file.
|
|
593
|
-
hw.store_feature_chunk("basinmap0",
|
|
585
|
+
hw.store_feature_chunk("basinmap0", basinmap0)
|
|
586
|
+
self.logger.info("Input mapped to output with basinmap0")
|
|
594
587
|
# We don't need them anymore.
|
|
595
588
|
del hout["events/index_unmapped"]
|
|
596
589
|
|
|
@@ -598,19 +591,72 @@ class DCNumJobRunner(threading.Thread):
|
|
|
598
591
|
# is the size of the raw dataset and the latter is its mapped
|
|
599
592
|
# size!
|
|
600
593
|
size_raw = self.draw.h5.attrs["experiment:event count"]
|
|
601
|
-
if (len(
|
|
602
|
-
and np.all(
|
|
594
|
+
if (len(basinmap0) == size_raw
|
|
595
|
+
and np.all(basinmap0 == np.arange(size_raw))):
|
|
603
596
|
# This means that the images in the input overlap perfectly
|
|
604
597
|
# with the images in the output, i.e. a "copy" segmenter
|
|
605
598
|
# was used or something is very reproducible.
|
|
606
599
|
# We set basinmap to None to be more efficient.
|
|
607
|
-
|
|
600
|
+
basinmap0 = None
|
|
608
601
|
|
|
609
602
|
else:
|
|
610
603
|
# The input is identical to the output, because we are using
|
|
611
604
|
# the same pipeline identifier.
|
|
612
|
-
|
|
613
|
-
|
|
605
|
+
basinmap0 = None
|
|
606
|
+
|
|
607
|
+
# List of features we have to copy from input to output.
|
|
608
|
+
# We need to make sure that the features are correctly attributed
|
|
609
|
+
# from the input files. E.g. if the input file already has
|
|
610
|
+
# background images, but we recompute the background images, then
|
|
611
|
+
# we have to use the data from the recomputed background file.
|
|
612
|
+
# We achieve this by keeping a specific order and only copying
|
|
613
|
+
# those features that we don't already have in the output file.
|
|
614
|
+
feats_raw = [
|
|
615
|
+
# background data from the temporary input image
|
|
616
|
+
[self.dtin.h5, ["bg_off"], "critical"],
|
|
617
|
+
[self.draw.h5, self.draw.features_scalar_frame, "optional"],
|
|
618
|
+
[self.draw.h5, ["image", "bg_off"], "optional"],
|
|
619
|
+
]
|
|
620
|
+
|
|
621
|
+
# Store image_bg as an internal basin, if defined in input
|
|
622
|
+
for idx in range(len(self.dtin.basins)):
|
|
623
|
+
bn_dict = self.dtin.basins[idx]
|
|
624
|
+
if (bn_dict["type"] == "internal"
|
|
625
|
+
and "image_bg" in bn_dict["features"]):
|
|
626
|
+
self.logger.info(
|
|
627
|
+
"Copying internal basin background images")
|
|
628
|
+
bn_grp, bn_feats, bn_map = self.dtin.get_basin_data(idx)
|
|
629
|
+
assert "image_bg" in bn_feats
|
|
630
|
+
# Load all images into memory (should only be ~600)
|
|
631
|
+
bg_images1 = self.dtin.h5["basin_events"]["image_bg"][:]
|
|
632
|
+
# Get the original internal mapping for these images
|
|
633
|
+
# Note that `basinmap0` always refers to indices in the
|
|
634
|
+
# original raw input file, and not to indices in an
|
|
635
|
+
# optional mapped input file (using `index_mapping`).
|
|
636
|
+
# Therefore, we do `self.dtin.h5["events"]["basinmap0"]`
|
|
637
|
+
# instead of `self.dtin["basinmap0"]`
|
|
638
|
+
basinmap_in = self.dtin.h5["events"][bn_dict["mapping"]][:]
|
|
639
|
+
# Now we have to convert the indices in `basinmap_in`
|
|
640
|
+
# to indices in the output file.
|
|
641
|
+
basinmap1 = basinmap_in[basinmap0]
|
|
642
|
+
# Store the internal mapping in the output file
|
|
643
|
+
hw.store_basin(name=bn_dict["name"],
|
|
644
|
+
description=bn_dict["description"],
|
|
645
|
+
mapping=basinmap1,
|
|
646
|
+
internal_data={"image_bg": bg_images1}
|
|
647
|
+
)
|
|
648
|
+
break
|
|
649
|
+
else:
|
|
650
|
+
self.logger.info("Background images must be copied")
|
|
651
|
+
# There is no internal image_bg feature, probably because
|
|
652
|
+
# the user did not use the sparsemed background correction.
|
|
653
|
+
# In this case, we simply add "image_bg" to the `feats_raw`.
|
|
654
|
+
feats_raw += [
|
|
655
|
+
[self.dtin.h5, ["image_bg"], "critical"],
|
|
656
|
+
[self.draw.h5, ["image_bg"], "optional"],
|
|
657
|
+
]
|
|
658
|
+
|
|
659
|
+
# Copy the features required in the output file.
|
|
614
660
|
for hin, feats, importance in feats_raw:
|
|
615
661
|
# Only consider features that are available in the input
|
|
616
662
|
# and that are not already in the output.
|
|
@@ -625,7 +671,7 @@ class DCNumJobRunner(threading.Thread):
|
|
|
625
671
|
copy_features(h5_src=hin,
|
|
626
672
|
h5_dst=hout,
|
|
627
673
|
features=feats,
|
|
628
|
-
mapping=
|
|
674
|
+
mapping=basinmap0)
|
|
629
675
|
else:
|
|
630
676
|
# TAP: Create basins for the "optional" features in the
|
|
631
677
|
# output file. Note that the "critical" features never
|
|
@@ -633,11 +679,17 @@ class DCNumJobRunner(threading.Thread):
|
|
|
633
679
|
self.logger.debug(f"Creating basin for {feats}")
|
|
634
680
|
# Relative and absolute paths.
|
|
635
681
|
pin = pathlib.Path(hin.filename).resolve()
|
|
682
|
+
paths = [pin]
|
|
636
683
|
pout = pathlib.Path(hout.filename).resolve().parent
|
|
637
|
-
|
|
684
|
+
try:
|
|
685
|
+
paths.append(os.path.relpath(pin, pout))
|
|
686
|
+
except ValueError:
|
|
687
|
+
# This means it is impossible to compute a relative
|
|
688
|
+
# path (e.g. different drive letter on Windows).
|
|
689
|
+
pass
|
|
638
690
|
hw.store_basin(name="dcnum basin",
|
|
639
691
|
features=feats,
|
|
640
|
-
mapping=
|
|
692
|
+
mapping=basinmap0,
|
|
641
693
|
paths=paths,
|
|
642
694
|
description=f"Created with dcnum {version}",
|
|
643
695
|
)
|
|
@@ -730,7 +782,6 @@ class DCNumJobRunner(threading.Thread):
|
|
|
730
782
|
|
|
731
783
|
# Start the data collection thread
|
|
732
784
|
thr_coll = QueueCollectorThread(
|
|
733
|
-
data=self.dtin,
|
|
734
785
|
event_queue=fe_kwargs["event_queue"],
|
|
735
786
|
writer_dq=writer_dq,
|
|
736
787
|
feat_nevents=fe_kwargs["feat_nevents"],
|
dcnum/meta/ppid.py
CHANGED
|
@@ -7,10 +7,11 @@ import pathlib
|
|
|
7
7
|
from typing import Dict, List, Protocol
|
|
8
8
|
import warnings
|
|
9
9
|
|
|
10
|
+
import numpy as np
|
|
10
11
|
|
|
11
12
|
#: Increment this string if there are breaking changes that make
|
|
12
13
|
#: previous pipelines unreproducible.
|
|
13
|
-
DCNUM_PPID_GENERATION = "
|
|
14
|
+
DCNUM_PPID_GENERATION = "11"
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class ClassWithPPIDCapabilities(Protocol):
|
|
@@ -140,9 +141,9 @@ def kwargs_to_ppid(cls: ClassWithPPIDCapabilities,
|
|
|
140
141
|
path = pathlib.Path(val)
|
|
141
142
|
if path.exists():
|
|
142
143
|
val = path.name
|
|
143
|
-
if isinstance(val, bool):
|
|
144
|
+
if isinstance(val, (bool, np.bool_)):
|
|
144
145
|
val = int(val) # do not print e.g. "True"
|
|
145
|
-
elif isinstance(val, float):
|
|
146
|
+
elif isinstance(val, (float, np.floating)):
|
|
146
147
|
if val == int(val):
|
|
147
148
|
val = int(val) # omit the ".0" at the end
|
|
148
149
|
concat_strings.append(f"{abr}={val}")
|
dcnum/read/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# flake8: noqa: F401
|
|
2
2
|
from .cache import md5sum
|
|
3
3
|
from .const import PROTECTED_FEATURES
|
|
4
|
+
from .detect_flicker import detect_flickering
|
|
4
5
|
from .hdf5_data import HDF5Data, HDF5ImageCache, concatenated_hdf5_data
|
|
5
6
|
from .mapped import get_mapping_indices, get_mapped_object
|
dcnum/read/cache.py
CHANGED
|
@@ -36,9 +36,10 @@ class BaseImageChunkCache(abc.ABC):
|
|
|
36
36
|
def __getitem__(self, index):
|
|
37
37
|
if isinstance(index, (slice, list, np.ndarray)):
|
|
38
38
|
if isinstance(index, slice):
|
|
39
|
-
indices = np.arange(
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
indices = np.arange(
|
|
40
|
+
index.start or 0,
|
|
41
|
+
min(index.stop, len(self)) if index.stop else len(self),
|
|
42
|
+
index.step)
|
|
42
43
|
else:
|
|
43
44
|
indices = index
|
|
44
45
|
array_out = np.empty((len(indices),) + self.image_shape,
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .hdf5_data import HDF5Data
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def detect_flickering(image_data: np.ndarray | HDF5Data,
|
|
7
|
+
roi_height: int = 10,
|
|
8
|
+
brightness_threshold: float = 2.5,
|
|
9
|
+
count_threshold: int = 5,
|
|
10
|
+
max_frames: int = 1000):
|
|
11
|
+
"""Determine whether an image series experiences flickering
|
|
12
|
+
|
|
13
|
+
Flickering is an unwelcome phenomenon due to a faulty data
|
|
14
|
+
acquisition device. For instance, if there is random voltage noise in
|
|
15
|
+
the electronics managing the LED power, then the brightness of the
|
|
16
|
+
LED will vary randomly when the noise signal overlaps with the flash
|
|
17
|
+
triggering signal.
|
|
18
|
+
|
|
19
|
+
If flickering is detected, you should use the "sparsemed" background
|
|
20
|
+
computation with `offset_correction` set to True.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
image_data:
|
|
25
|
+
sliceable object (e.g. numpy array or HDF5Data) containing
|
|
26
|
+
image data.
|
|
27
|
+
roi_height: int
|
|
28
|
+
height of the ROI in pixels for which to search for flickering;
|
|
29
|
+
the entire width of the image is used
|
|
30
|
+
brightness_threshold: float
|
|
31
|
+
brightness difference between individual ROIs median and median
|
|
32
|
+
of all ROI medians leading to a positive flickering event
|
|
33
|
+
count_threshold: int
|
|
34
|
+
minimum number of flickering events that would lead to a positive
|
|
35
|
+
flickering decision
|
|
36
|
+
max_frames: int
|
|
37
|
+
maximum number of frames to include in the flickering analysis
|
|
38
|
+
"""
|
|
39
|
+
# slice event axis first in case we have and HDF5Data instance
|
|
40
|
+
roi_data = image_data[:max_frames][:, :roi_height, :]
|
|
41
|
+
roi_median = np.median(roi_data, axis=(1, 2))
|
|
42
|
+
roi_offset = roi_median - np.median(roi_median)
|
|
43
|
+
flickering_events = np.sum(np.abs(roi_offset) >= abs(brightness_threshold))
|
|
44
|
+
return flickering_events >= count_threshold
|