dcnum 0.23.1__py3-none-any.whl → 0.25.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dcnum might be problematic. Click here for more details.
- dcnum/_version.py +2 -2
- dcnum/feat/event_extractor_manager_thread.py +6 -5
- dcnum/feat/feat_background/base.py +24 -9
- dcnum/feat/feat_background/bg_sparse_median.py +56 -30
- dcnum/logic/ctrl.py +118 -43
- dcnum/logic/job.py +22 -0
- dcnum/meta/ppid.py +4 -3
- dcnum/read/__init__.py +1 -0
- dcnum/read/cache.py +4 -3
- dcnum/read/detect_flicker.py +44 -0
- dcnum/read/hdf5_data.py +138 -70
- dcnum/read/mapped.py +15 -2
- dcnum/segm/segm_torch/__init__.py +8 -4
- dcnum/segm/segm_torch/segm_torch_mpo.py +4 -1
- dcnum/write/__init__.py +1 -1
- dcnum/write/queue_collector_thread.py +7 -14
- dcnum/write/writer.py +149 -36
- {dcnum-0.23.1.dist-info → dcnum-0.25.1.dist-info}/METADATA +2 -2
- {dcnum-0.23.1.dist-info → dcnum-0.25.1.dist-info}/RECORD +22 -21
- {dcnum-0.23.1.dist-info → dcnum-0.25.1.dist-info}/WHEEL +1 -1
- {dcnum-0.23.1.dist-info → dcnum-0.25.1.dist-info}/LICENSE +0 -0
- {dcnum-0.23.1.dist-info → dcnum-0.25.1.dist-info}/top_level.txt +0 -0
dcnum/_version.py
CHANGED
|
@@ -96,12 +96,13 @@ class EventExtractorManagerThread(threading.Thread):
|
|
|
96
96
|
# If the writer_dq starts filling up, then this could lead to
|
|
97
97
|
# an oom-kill signal. Stall for the writer to prevent this.
|
|
98
98
|
if (ldq := len(self.writer_dq)) > 1000:
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
99
|
+
stalled_sec = 0.
|
|
100
|
+
for ii in range(60):
|
|
101
|
+
if len(self.writer_dq) > 200:
|
|
102
|
+
time.sleep(.5)
|
|
103
|
+
stalled_sec += .5
|
|
103
104
|
self.logger.warning(
|
|
104
|
-
f"Stalled {
|
|
105
|
+
f"Stalled {stalled_sec:.1f}s due to slow writer "
|
|
105
106
|
f"({ldq} chunks queued)")
|
|
106
107
|
|
|
107
108
|
unavailable_slots = 0
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import abc
|
|
2
2
|
import functools
|
|
3
3
|
import inspect
|
|
4
|
+
import logging
|
|
4
5
|
import multiprocessing as mp
|
|
5
6
|
import pathlib
|
|
7
|
+
import time
|
|
6
8
|
|
|
7
9
|
import h5py
|
|
8
10
|
|
|
@@ -41,8 +43,11 @@ class Background(abc.ABC):
|
|
|
41
43
|
kwargs:
|
|
42
44
|
Additional keyword arguments passed to the subclass.
|
|
43
45
|
"""
|
|
46
|
+
self.logger = logging.getLogger(
|
|
47
|
+
f"dcnum.feat.feat_background.{self.__class__.__name__}")
|
|
44
48
|
# proper conversion to Path objects
|
|
45
49
|
output_path = pathlib.Path(output_path)
|
|
50
|
+
self.output_path = output_path
|
|
46
51
|
if isinstance(input_data, str):
|
|
47
52
|
input_data = pathlib.Path(input_data)
|
|
48
53
|
# kwargs checks
|
|
@@ -188,20 +193,30 @@ class Background(abc.ABC):
|
|
|
188
193
|
return self.image_proc.value
|
|
189
194
|
|
|
190
195
|
def process(self):
|
|
196
|
+
"""Perform the background computation
|
|
197
|
+
|
|
198
|
+
This irreversibly removes/overrides any "image_bg" and
|
|
199
|
+
"bg_off" features defined in the output file `self.h5out`.
|
|
200
|
+
"""
|
|
201
|
+
t0 = time.perf_counter()
|
|
191
202
|
# Delete any old background data
|
|
192
|
-
for
|
|
193
|
-
|
|
194
|
-
|
|
203
|
+
for ds_key in ["image_bg", "bg_off"]:
|
|
204
|
+
for grp_key in ["events", "basin_events"]:
|
|
205
|
+
if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
|
|
206
|
+
del self.h5out[grp_key][ds_key]
|
|
195
207
|
# Perform the actual background computation
|
|
196
208
|
self.process_approach()
|
|
197
209
|
bg_ppid = self.get_ppid()
|
|
198
210
|
# Store pipeline information in the image_bg/bg_off feature
|
|
199
|
-
for
|
|
200
|
-
|
|
201
|
-
self.h5out
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
211
|
+
for ds_key in ["image_bg", "bg_off"]:
|
|
212
|
+
for grp_key in ["events", "basin_events"]:
|
|
213
|
+
if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
|
|
214
|
+
self.h5out[f"{grp_key}/{ds_key}"].attrs[
|
|
215
|
+
"dcnum ppid background"] = bg_ppid
|
|
216
|
+
self.h5out[F"{grp_key}/{ds_key}"].attrs[
|
|
217
|
+
"dcnum ppid generation"] = ppid.DCNUM_PPID_GENERATION
|
|
218
|
+
self.logger.info(
|
|
219
|
+
f"Background computation time: {time.perf_counter()-t0:.1f}s")
|
|
205
220
|
|
|
206
221
|
@abc.abstractmethod
|
|
207
222
|
def process_approach(self):
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import queue
|
|
3
2
|
import time
|
|
4
3
|
|
|
@@ -9,14 +8,13 @@ from ...read import HDF5Data
|
|
|
9
8
|
|
|
10
9
|
from .base import mp_spawn, Background
|
|
11
10
|
|
|
12
|
-
logger = logging.getLogger(__name__)
|
|
13
|
-
|
|
14
11
|
|
|
15
12
|
class BackgroundSparseMed(Background):
|
|
16
13
|
def __init__(self, input_data, output_path, kernel_size=200,
|
|
17
14
|
split_time=1., thresh_cleansing=0, frac_cleansing=.8,
|
|
18
15
|
offset_correction=True,
|
|
19
|
-
compress=True,
|
|
16
|
+
compress=True,
|
|
17
|
+
num_cpus=None):
|
|
20
18
|
"""Sparse median background correction with cleansing
|
|
21
19
|
|
|
22
20
|
In contrast to the rolling median background correction,
|
|
@@ -61,7 +59,7 @@ class BackgroundSparseMed(Background):
|
|
|
61
59
|
offset_correction: bool
|
|
62
60
|
The sparse median background correction produces one median
|
|
63
61
|
image for multiple input frames (BTW this also leads to very
|
|
64
|
-
efficient data storage with HDF5
|
|
62
|
+
efficient data storage with internal HDF5 basins). In
|
|
65
63
|
case the input frames are subject to frame-by-frame brightness
|
|
66
64
|
variations (e.g. flickering of the illumination source), it
|
|
67
65
|
is useful to have an offset value per frame that can then be
|
|
@@ -79,6 +77,11 @@ class BackgroundSparseMed(Background):
|
|
|
79
77
|
num_cpus: int
|
|
80
78
|
Number of CPUs to use for median computation. Defaults to
|
|
81
79
|
`multiprocessing.cpu_count()`.
|
|
80
|
+
|
|
81
|
+
.. versionchanged:: 0.23.5
|
|
82
|
+
|
|
83
|
+
The background image data are stored as an internal
|
|
84
|
+
mapped basin to reduce the output file size.
|
|
82
85
|
"""
|
|
83
86
|
super(BackgroundSparseMed, self).__init__(
|
|
84
87
|
input_data=input_data,
|
|
@@ -93,7 +96,7 @@ class BackgroundSparseMed(Background):
|
|
|
93
96
|
)
|
|
94
97
|
|
|
95
98
|
if kernel_size > len(self.input_data):
|
|
96
|
-
logger.warning(
|
|
99
|
+
self.logger.warning(
|
|
97
100
|
f"The kernel size {kernel_size} is too large for input data"
|
|
98
101
|
f"size {len(self.input_data)}. Setting it to input data size!")
|
|
99
102
|
kernel_size = len(self.input_data)
|
|
@@ -126,13 +129,14 @@ class BackgroundSparseMed(Background):
|
|
|
126
129
|
else:
|
|
127
130
|
# compute time using frame rate (approximate)
|
|
128
131
|
dur = self.image_count / fr * 1.5
|
|
129
|
-
logger.info(
|
|
132
|
+
self.logger.info(
|
|
133
|
+
f"Approximating duration: {dur/60:.1f}min")
|
|
130
134
|
self.time = np.linspace(0, dur, self.image_count,
|
|
131
135
|
endpoint=True)
|
|
132
136
|
if self.time is None:
|
|
133
137
|
# No HDF5 file or no information therein; Make an educated guess.
|
|
134
138
|
dur = self.image_count / 3600 * 1.5
|
|
135
|
-
logger.info(f"Guessing duration: {dur/60:.1f}min")
|
|
139
|
+
self.logger.info(f"Guessing duration: {dur/60:.1f}min")
|
|
136
140
|
self.time = np.linspace(0, dur, self.image_count,
|
|
137
141
|
endpoint=True)
|
|
138
142
|
|
|
@@ -222,7 +226,7 @@ class BackgroundSparseMed(Background):
|
|
|
222
226
|
offset_correction: bool
|
|
223
227
|
The sparse median background correction produces one median
|
|
224
228
|
image for multiple input frames (BTW this also leads to very
|
|
225
|
-
efficient data storage with HDF5
|
|
229
|
+
efficient data storage with internal HDF5 basins). In
|
|
226
230
|
case the input frames are subject to frame-by-frame brightness
|
|
227
231
|
variations (e.g. flickering of the illumination source), it
|
|
228
232
|
is useful to have an offset value per frame that can then be
|
|
@@ -301,18 +305,18 @@ class BackgroundSparseMed(Background):
|
|
|
301
305
|
thresh = np.quantile(ref, self.frac_cleansing)
|
|
302
306
|
used = ref <= thresh
|
|
303
307
|
frac_remove = np.sum(~used) / used.size
|
|
304
|
-
logger.warning(
|
|
308
|
+
self.logger.warning(
|
|
305
309
|
f"{frac_remove_user:.1%} of the background images would "
|
|
306
310
|
f"be removed with the current settings, so we enforce "
|
|
307
311
|
f"`frac_cleansing`. To avoid this warning, try decreasing "
|
|
308
312
|
f"`thresh_cleansing` or `frac_cleansing`. The new "
|
|
309
313
|
f"threshold is {thresh_fact / thresh}.")
|
|
310
314
|
|
|
311
|
-
logger.info(f"Cleansed {frac_remove:.2%}")
|
|
315
|
+
self.logger.info(f"Cleansed {frac_remove:.2%}")
|
|
312
316
|
step_times = self.step_times[used]
|
|
313
317
|
bg_images = self.bg_images[used]
|
|
314
318
|
else:
|
|
315
|
-
logger.info("Background series cleansing disabled")
|
|
319
|
+
self.logger.info("Background series cleansing disabled")
|
|
316
320
|
step_times = self.step_times
|
|
317
321
|
bg_images = self.bg_images
|
|
318
322
|
|
|
@@ -322,35 +326,55 @@ class BackgroundSparseMed(Background):
|
|
|
322
326
|
idx1 = None
|
|
323
327
|
for ii in range(len(step_times)):
|
|
324
328
|
t1 = step_times[ii]
|
|
325
|
-
idx1 = np.argmin(np.abs(self.time - t1
|
|
329
|
+
idx1 = np.argmin(np.abs(self.time - t1 - self.split_time/2))
|
|
326
330
|
bg_idx[idx0:idx1] = ii
|
|
327
331
|
idx0 = idx1
|
|
328
332
|
if idx1 is not None:
|
|
329
333
|
# Fill up remainder of index array with last entry
|
|
330
334
|
bg_idx[idx1:] = ii
|
|
331
335
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
336
|
+
# Store the background images as an internal mapped basin
|
|
337
|
+
self.writer.store_basin(
|
|
338
|
+
name="background images",
|
|
339
|
+
description=f"Pipeline identifier: {self.get_ppid()}",
|
|
340
|
+
mapping=bg_idx,
|
|
341
|
+
internal_data={"image_bg": bg_images}
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# store the offset correction, if applicable
|
|
345
|
+
if self.offset_correction:
|
|
346
|
+
self.logger.info("Computing offset correction")
|
|
347
|
+
# compute the mean at the top of all background images
|
|
348
|
+
sh, sw = self.input_data.shape[1:]
|
|
349
|
+
roi_full = (slice(None), slice(0, 20), slice(0, sw))
|
|
350
|
+
bg_data_mean = np.mean(bg_images[roi_full], axis=(1, 2))
|
|
351
|
+
pos = 0
|
|
352
|
+
step = self.writer.get_best_nd_chunks(item_shape=(sh, sw),
|
|
353
|
+
feat_dtype=np.uint8)[0]
|
|
354
|
+
bg_off = np.zeros(self.image_count, dtype=float)
|
|
355
|
+
# For every chunk in the input image data, compute that
|
|
356
|
+
# value as well and store the resulting offset value.
|
|
357
|
+
# TODO: Could this be parallelized, or are we limited in reading?
|
|
358
|
+
while pos < self.image_count:
|
|
359
|
+
stop = min(pos + step, self.image_count)
|
|
343
360
|
# Record background offset correction "bg_off". We take a
|
|
344
361
|
# slice of 20px from the top of the image (there are normally
|
|
345
362
|
# no events here, only the channel walls are visible).
|
|
346
|
-
|
|
347
|
-
|
|
363
|
+
cur_slice = slice(pos, stop)
|
|
364
|
+
# mean background brightness
|
|
365
|
+
val_bg = bg_data_mean[bg_idx[cur_slice]]
|
|
366
|
+
# mean image brightness
|
|
348
367
|
roi_cur = (cur_slice, slice(0, 20), slice(0, sw))
|
|
349
|
-
val_bg = np.mean(cur_bg_data[roi_full], axis=(1, 2))
|
|
350
368
|
val_dat = np.mean(self.input_data[roi_cur], axis=(1, 2))
|
|
351
369
|
# background image = image_bg + bg_off
|
|
352
|
-
|
|
353
|
-
|
|
370
|
+
bg_off[cur_slice] = val_dat - val_bg
|
|
371
|
+
# set progress
|
|
372
|
+
self.image_proc.value = 0.5 * (1 + pos / self.image_count)
|
|
373
|
+
pos = stop
|
|
374
|
+
# finally, store the background offset feature
|
|
375
|
+
self.writer.store_feature_chunk("bg_off", bg_off)
|
|
376
|
+
|
|
377
|
+
self.image_proc.value = 1
|
|
354
378
|
|
|
355
379
|
def process_second(self,
|
|
356
380
|
ii: int,
|
|
@@ -393,7 +417,9 @@ class BackgroundSparseMed(Background):
|
|
|
393
417
|
|
|
394
418
|
self.bg_images[ii] = self.shared_output.reshape(self.image_shape)
|
|
395
419
|
|
|
396
|
-
self.image_proc.value = idx_stop /
|
|
420
|
+
self.image_proc.value = idx_stop / (
|
|
421
|
+
# with offset correction, everything is slower
|
|
422
|
+
self.image_count * (1 + self.offset_correction))
|
|
397
423
|
|
|
398
424
|
|
|
399
425
|
class WorkerSparseMed(mp_spawn.Process):
|
dcnum/logic/ctrl.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import collections
|
|
2
2
|
import datetime
|
|
3
3
|
import hashlib
|
|
4
|
+
import importlib
|
|
4
5
|
import json
|
|
5
6
|
import logging
|
|
6
7
|
from logging.handlers import QueueListener
|
|
@@ -33,6 +34,7 @@ from ..write import (
|
|
|
33
34
|
from .job import DCNumPipelineJob
|
|
34
35
|
from .json_encoder import ExtendedJSONEncoder
|
|
35
36
|
|
|
37
|
+
|
|
36
38
|
# Force using "spawn" method for multiprocessing, because we are using
|
|
37
39
|
# queues and threads and would end up with race conditions otherwise.
|
|
38
40
|
mp_spawn = mp.get_context("spawn")
|
|
@@ -401,6 +403,12 @@ class DCNumJobRunner(threading.Thread):
|
|
|
401
403
|
features=orig_feats,
|
|
402
404
|
mapping=None)
|
|
403
405
|
|
|
406
|
+
# Handle basin data according to the user's request
|
|
407
|
+
self.state = "plumbing"
|
|
408
|
+
self.task_enforce_basin_strategy()
|
|
409
|
+
|
|
410
|
+
self.state = "cleanup"
|
|
411
|
+
|
|
404
412
|
with HDF5Writer(self.path_temp_out) as hw:
|
|
405
413
|
# pipeline metadata
|
|
406
414
|
hw.h5.attrs["pipeline:dcnum generation"] = self.ppdict["gen_id"]
|
|
@@ -430,6 +438,16 @@ class DCNumJobRunner(threading.Thread):
|
|
|
430
438
|
"build": ", ".join(platform.python_build()),
|
|
431
439
|
"implementation":
|
|
432
440
|
platform.python_implementation(),
|
|
441
|
+
"libraries": get_library_versions_dict([
|
|
442
|
+
"cv2",
|
|
443
|
+
"h5py",
|
|
444
|
+
"mahotas",
|
|
445
|
+
"numba",
|
|
446
|
+
"numpy",
|
|
447
|
+
"scipy",
|
|
448
|
+
"skimage",
|
|
449
|
+
"torch",
|
|
450
|
+
]),
|
|
433
451
|
"version": platform.python_version(),
|
|
434
452
|
},
|
|
435
453
|
"system": {
|
|
@@ -450,11 +468,7 @@ class DCNumJobRunner(threading.Thread):
|
|
|
450
468
|
|
|
451
469
|
# copy metadata/logs/tables from original file
|
|
452
470
|
with h5py.File(self.job["path_in"]) as h5_src:
|
|
453
|
-
copy_metadata(h5_src=h5_src,
|
|
454
|
-
h5_dst=hw.h5,
|
|
455
|
-
# Don't copy basins, we would have to index-map
|
|
456
|
-
# them first.
|
|
457
|
-
copy_basins=False)
|
|
471
|
+
copy_metadata(h5_src=h5_src, h5_dst=hw.h5)
|
|
458
472
|
if redo_seg:
|
|
459
473
|
# Store the correct measurement identifier. This is used to
|
|
460
474
|
# identify this file as a correct basin in subsequent pipeline
|
|
@@ -478,12 +492,6 @@ class DCNumJobRunner(threading.Thread):
|
|
|
478
492
|
mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
|
|
479
493
|
hw.h5.attrs["experiment:run identifier"] = mid_new
|
|
480
494
|
|
|
481
|
-
# Handle basin data according to the user's request
|
|
482
|
-
self.state = "plumbing"
|
|
483
|
-
self.task_enforce_basin_strategy()
|
|
484
|
-
|
|
485
|
-
self.state = "cleanup"
|
|
486
|
-
|
|
487
495
|
trun = datetime.timedelta(seconds=round(time.monotonic() - time_start))
|
|
488
496
|
self.logger.info(f"Run duration: {str(trun)}")
|
|
489
497
|
self.logger.info(time.strftime("Run stop: %Y-%m-%d-%H.%M.%S",
|
|
@@ -535,24 +543,19 @@ class DCNumJobRunner(threading.Thread):
|
|
|
535
543
|
"""
|
|
536
544
|
self._progress_bn = 0
|
|
537
545
|
t0 = time.perf_counter()
|
|
538
|
-
# We
|
|
539
|
-
#
|
|
540
|
-
#
|
|
541
|
-
#
|
|
542
|
-
#
|
|
543
|
-
# features
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
# 3. image features from the input file
|
|
552
|
-
[self.draw.h5, ["image", "image_bg", "bg_off"], "optional"],
|
|
553
|
-
]
|
|
554
|
-
with h5py.File(self.path_temp_out, "a") as hout:
|
|
555
|
-
hw = HDF5Writer(hout)
|
|
546
|
+
# We have these points to consider:
|
|
547
|
+
# - We must use the `basinmap` feature to map from the original
|
|
548
|
+
# file to the output file.
|
|
549
|
+
# - We must copy "bg_off" and "image_bg" to the output file.
|
|
550
|
+
# - For the "drain" basin strategy, we also have to copy all the
|
|
551
|
+
# other features.
|
|
552
|
+
# - If "image_bg" is defined as an internal basin in the input
|
|
553
|
+
# file, we have to convert the mapping and store a corresponding
|
|
554
|
+
# internal basin in the output file.
|
|
555
|
+
|
|
556
|
+
# Determine the basinmap feature
|
|
557
|
+
with HDF5Writer(self.path_temp_out) as hw:
|
|
558
|
+
hout = hw.h5
|
|
556
559
|
# First, we have to determine the basin mapping from input to
|
|
557
560
|
# output. This information is stored by the QueueCollectorThread
|
|
558
561
|
# in the "basinmap0" feature, ready to be used by us.
|
|
@@ -565,21 +568,22 @@ class DCNumJobRunner(threading.Thread):
|
|
|
565
568
|
# mapping of the input file was set to slice(1, 100), then the
|
|
566
569
|
# first image would not be there, and we would have
|
|
567
570
|
# [1, 1, 1, ...].
|
|
568
|
-
idx_um = hout["events/index_unmapped"]
|
|
571
|
+
idx_um = hout["events/index_unmapped"][:]
|
|
569
572
|
|
|
570
573
|
# If we want to convert this to an actual basinmap feature,
|
|
571
574
|
# then we have to convert those indices to indices that map
|
|
572
575
|
# to the original input HDF5 file.
|
|
573
576
|
raw_im = self.draw.index_mapping
|
|
574
577
|
if raw_im is None:
|
|
575
|
-
self.logger.info("Input file mapped with basinmap0")
|
|
576
578
|
# Create a hard link to save time and space
|
|
577
579
|
hout["events/basinmap0"] = hout["events/index_unmapped"]
|
|
578
|
-
|
|
580
|
+
basinmap0 = idx_um
|
|
579
581
|
else:
|
|
580
|
-
|
|
582
|
+
self.logger.info("Converting input mapping")
|
|
583
|
+
basinmap0 = get_mapping_indices(raw_im)[idx_um]
|
|
581
584
|
# Store the mapped basin data in the output file.
|
|
582
|
-
hw.store_feature_chunk("basinmap0",
|
|
585
|
+
hw.store_feature_chunk("basinmap0", basinmap0)
|
|
586
|
+
self.logger.info("Input mapped to output with basinmap0")
|
|
583
587
|
# We don't need them anymore.
|
|
584
588
|
del hout["events/index_unmapped"]
|
|
585
589
|
|
|
@@ -587,19 +591,72 @@ class DCNumJobRunner(threading.Thread):
|
|
|
587
591
|
# is the size of the raw dataset and the latter is its mapped
|
|
588
592
|
# size!
|
|
589
593
|
size_raw = self.draw.h5.attrs["experiment:event count"]
|
|
590
|
-
if (len(
|
|
591
|
-
and np.all(
|
|
594
|
+
if (len(basinmap0) == size_raw
|
|
595
|
+
and np.all(basinmap0 == np.arange(size_raw))):
|
|
592
596
|
# This means that the images in the input overlap perfectly
|
|
593
597
|
# with the images in the output, i.e. a "copy" segmenter
|
|
594
598
|
# was used or something is very reproducible.
|
|
595
599
|
# We set basinmap to None to be more efficient.
|
|
596
|
-
|
|
600
|
+
basinmap0 = None
|
|
597
601
|
|
|
598
602
|
else:
|
|
599
603
|
# The input is identical to the output, because we are using
|
|
600
604
|
# the same pipeline identifier.
|
|
601
|
-
|
|
602
|
-
|
|
605
|
+
basinmap0 = None
|
|
606
|
+
|
|
607
|
+
# List of features we have to copy from input to output.
|
|
608
|
+
# We need to make sure that the features are correctly attributed
|
|
609
|
+
# from the input files. E.g. if the input file already has
|
|
610
|
+
# background images, but we recompute the background images, then
|
|
611
|
+
# we have to use the data from the recomputed background file.
|
|
612
|
+
# We achieve this by keeping a specific order and only copying
|
|
613
|
+
# those features that we don't already have in the output file.
|
|
614
|
+
feats_raw = [
|
|
615
|
+
# background data from the temporary input image
|
|
616
|
+
[self.dtin.h5, ["bg_off"], "critical"],
|
|
617
|
+
[self.draw.h5, self.draw.features_scalar_frame, "optional"],
|
|
618
|
+
[self.draw.h5, ["image", "bg_off"], "optional"],
|
|
619
|
+
]
|
|
620
|
+
|
|
621
|
+
# Store image_bg as an internal basin, if defined in input
|
|
622
|
+
for idx in range(len(self.dtin.basins)):
|
|
623
|
+
bn_dict = self.dtin.basins[idx]
|
|
624
|
+
if (bn_dict["type"] == "internal"
|
|
625
|
+
and "image_bg" in bn_dict["features"]):
|
|
626
|
+
self.logger.info(
|
|
627
|
+
"Copying internal basin background images")
|
|
628
|
+
bn_grp, bn_feats, bn_map = self.dtin.get_basin_data(idx)
|
|
629
|
+
assert "image_bg" in bn_feats
|
|
630
|
+
# Load all images into memory (should only be ~600)
|
|
631
|
+
bg_images1 = self.dtin.h5["basin_events"]["image_bg"][:]
|
|
632
|
+
# Get the original internal mapping for these images
|
|
633
|
+
# Note that `basinmap0` always refers to indices in the
|
|
634
|
+
# original raw input file, and not to indices in an
|
|
635
|
+
# optional mapped input file (using `index_mapping`).
|
|
636
|
+
# Therefore, we do `self.dtin.h5["events"]["basinmap0"]`
|
|
637
|
+
# instead of `self.dtin["basinmap0"]`
|
|
638
|
+
basinmap_in = self.dtin.h5["events"][bn_dict["mapping"]][:]
|
|
639
|
+
# Now we have to convert the indices in `basinmap_in`
|
|
640
|
+
# to indices in the output file.
|
|
641
|
+
basinmap1 = basinmap_in[basinmap0]
|
|
642
|
+
# Store the internal mapping in the output file
|
|
643
|
+
hw.store_basin(name=bn_dict["name"],
|
|
644
|
+
description=bn_dict["description"],
|
|
645
|
+
mapping=basinmap1,
|
|
646
|
+
internal_data={"image_bg": bg_images1}
|
|
647
|
+
)
|
|
648
|
+
break
|
|
649
|
+
else:
|
|
650
|
+
self.logger.info("Background images must be copied")
|
|
651
|
+
# There is no internal image_bg feature, probably because
|
|
652
|
+
# the user did not use the sparsemed background correction.
|
|
653
|
+
# In this case, we simply add "image_bg" to the `feats_raw`.
|
|
654
|
+
feats_raw += [
|
|
655
|
+
[self.dtin.h5, ["image_bg"], "critical"],
|
|
656
|
+
[self.draw.h5, ["image_bg"], "optional"],
|
|
657
|
+
]
|
|
658
|
+
|
|
659
|
+
# Copy the features required in the output file.
|
|
603
660
|
for hin, feats, importance in feats_raw:
|
|
604
661
|
# Only consider features that are available in the input
|
|
605
662
|
# and that are not already in the output.
|
|
@@ -614,7 +671,7 @@ class DCNumJobRunner(threading.Thread):
|
|
|
614
671
|
copy_features(h5_src=hin,
|
|
615
672
|
h5_dst=hout,
|
|
616
673
|
features=feats,
|
|
617
|
-
mapping=
|
|
674
|
+
mapping=basinmap0)
|
|
618
675
|
else:
|
|
619
676
|
# TAP: Create basins for the "optional" features in the
|
|
620
677
|
# output file. Note that the "critical" features never
|
|
@@ -622,11 +679,17 @@ class DCNumJobRunner(threading.Thread):
|
|
|
622
679
|
self.logger.debug(f"Creating basin for {feats}")
|
|
623
680
|
# Relative and absolute paths.
|
|
624
681
|
pin = pathlib.Path(hin.filename).resolve()
|
|
682
|
+
paths = [pin]
|
|
625
683
|
pout = pathlib.Path(hout.filename).resolve().parent
|
|
626
|
-
|
|
684
|
+
try:
|
|
685
|
+
paths.append(os.path.relpath(pin, pout))
|
|
686
|
+
except ValueError:
|
|
687
|
+
# This means it is impossible to compute a relative
|
|
688
|
+
# path (e.g. different drive letter on Windows).
|
|
689
|
+
pass
|
|
627
690
|
hw.store_basin(name="dcnum basin",
|
|
628
691
|
features=feats,
|
|
629
|
-
mapping=
|
|
692
|
+
mapping=basinmap0,
|
|
630
693
|
paths=paths,
|
|
631
694
|
description=f"Created with dcnum {version}",
|
|
632
695
|
)
|
|
@@ -719,7 +782,6 @@ class DCNumJobRunner(threading.Thread):
|
|
|
719
782
|
|
|
720
783
|
# Start the data collection thread
|
|
721
784
|
thr_coll = QueueCollectorThread(
|
|
722
|
-
data=self.dtin,
|
|
723
785
|
event_queue=fe_kwargs["event_queue"],
|
|
724
786
|
writer_dq=writer_dq,
|
|
725
787
|
feat_nevents=fe_kwargs["feat_nevents"],
|
|
@@ -780,6 +842,19 @@ class DCNumJobRunner(threading.Thread):
|
|
|
780
842
|
self.logger.info("Finished segmentation and feature extraction")
|
|
781
843
|
|
|
782
844
|
|
|
845
|
+
def get_library_versions_dict(library_name_list):
|
|
846
|
+
version_dict = {}
|
|
847
|
+
for library_name in library_name_list:
|
|
848
|
+
try:
|
|
849
|
+
lib = importlib.import_module(library_name)
|
|
850
|
+
except BaseException:
|
|
851
|
+
version = None
|
|
852
|
+
else:
|
|
853
|
+
version = lib.__version__
|
|
854
|
+
version_dict[library_name] = version
|
|
855
|
+
return version_dict
|
|
856
|
+
|
|
857
|
+
|
|
783
858
|
def join_thread_helper(thr, timeout, retries, logger, name):
|
|
784
859
|
for _ in range(retries):
|
|
785
860
|
thr.join(timeout=timeout)
|
dcnum/logic/job.py
CHANGED
|
@@ -182,3 +182,25 @@ class DCNumPipelineJob:
|
|
|
182
182
|
if len(ret) == 1:
|
|
183
183
|
ret = ret[0]
|
|
184
184
|
return ret
|
|
185
|
+
|
|
186
|
+
def validate(self):
|
|
187
|
+
"""Make sure the pipeline will run given the job kwargs
|
|
188
|
+
|
|
189
|
+
Returns
|
|
190
|
+
-------
|
|
191
|
+
True:
|
|
192
|
+
for testing convenience
|
|
193
|
+
|
|
194
|
+
Raises
|
|
195
|
+
------
|
|
196
|
+
dcnum.segm.SegmenterNotApplicableError:
|
|
197
|
+
the segmenter is incompatible with the input path
|
|
198
|
+
"""
|
|
199
|
+
# Check segmenter applicability applicability
|
|
200
|
+
seg_cls = get_available_segmenters()[self.kwargs["segmenter_code"]]
|
|
201
|
+
with HDF5Data(self.kwargs["path_in"]) as hd:
|
|
202
|
+
seg_cls.validate_applicability(
|
|
203
|
+
segmenter_kwargs=self.kwargs["segmenter_kwargs"],
|
|
204
|
+
logs=hd.logs,
|
|
205
|
+
meta=hd.meta)
|
|
206
|
+
return True
|
dcnum/meta/ppid.py
CHANGED
|
@@ -7,10 +7,11 @@ import pathlib
|
|
|
7
7
|
from typing import Dict, List, Protocol
|
|
8
8
|
import warnings
|
|
9
9
|
|
|
10
|
+
import numpy as np
|
|
10
11
|
|
|
11
12
|
#: Increment this string if there are breaking changes that make
|
|
12
13
|
#: previous pipelines unreproducible.
|
|
13
|
-
DCNUM_PPID_GENERATION = "
|
|
14
|
+
DCNUM_PPID_GENERATION = "11"
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class ClassWithPPIDCapabilities(Protocol):
|
|
@@ -140,9 +141,9 @@ def kwargs_to_ppid(cls: ClassWithPPIDCapabilities,
|
|
|
140
141
|
path = pathlib.Path(val)
|
|
141
142
|
if path.exists():
|
|
142
143
|
val = path.name
|
|
143
|
-
if isinstance(val, bool):
|
|
144
|
+
if isinstance(val, (bool, np.bool_)):
|
|
144
145
|
val = int(val) # do not print e.g. "True"
|
|
145
|
-
elif isinstance(val, float):
|
|
146
|
+
elif isinstance(val, (float, np.floating)):
|
|
146
147
|
if val == int(val):
|
|
147
148
|
val = int(val) # omit the ".0" at the end
|
|
148
149
|
concat_strings.append(f"{abr}={val}")
|
dcnum/read/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# flake8: noqa: F401
|
|
2
2
|
from .cache import md5sum
|
|
3
3
|
from .const import PROTECTED_FEATURES
|
|
4
|
+
from .detect_flicker import detect_flickering
|
|
4
5
|
from .hdf5_data import HDF5Data, HDF5ImageCache, concatenated_hdf5_data
|
|
5
6
|
from .mapped import get_mapping_indices, get_mapped_object
|
dcnum/read/cache.py
CHANGED
|
@@ -36,9 +36,10 @@ class BaseImageChunkCache(abc.ABC):
|
|
|
36
36
|
def __getitem__(self, index):
|
|
37
37
|
if isinstance(index, (slice, list, np.ndarray)):
|
|
38
38
|
if isinstance(index, slice):
|
|
39
|
-
indices = np.arange(
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
indices = np.arange(
|
|
40
|
+
index.start or 0,
|
|
41
|
+
min(index.stop, len(self)) if index.stop else len(self),
|
|
42
|
+
index.step)
|
|
42
43
|
else:
|
|
43
44
|
indices = index
|
|
44
45
|
array_out = np.empty((len(indices),) + self.image_shape,
|