dcnum 0.23.4__py3-none-any.whl → 0.25.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dcnum might be problematic. Click here for more details.
- dcnum/_version.py +2 -2
- dcnum/feat/feat_background/base.py +24 -9
- dcnum/feat/feat_background/bg_sparse_median.py +56 -30
- dcnum/logic/ctrl.py +83 -38
- dcnum/meta/ppid.py +4 -3
- dcnum/read/__init__.py +1 -0
- dcnum/read/cache.py +4 -3
- dcnum/read/detect_flicker.py +44 -0
- dcnum/read/hdf5_data.py +138 -72
- dcnum/read/mapped.py +15 -2
- dcnum/write/__init__.py +1 -1
- dcnum/write/writer.py +122 -21
- {dcnum-0.23.4.dist-info → dcnum-0.25.0.dist-info}/METADATA +1 -1
- {dcnum-0.23.4.dist-info → dcnum-0.25.0.dist-info}/RECORD +17 -16
- {dcnum-0.23.4.dist-info → dcnum-0.25.0.dist-info}/WHEEL +1 -1
- {dcnum-0.23.4.dist-info → dcnum-0.25.0.dist-info}/LICENSE +0 -0
- {dcnum-0.23.4.dist-info → dcnum-0.25.0.dist-info}/top_level.txt +0 -0
dcnum/_version.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
import abc
|
|
2
2
|
import functools
|
|
3
3
|
import inspect
|
|
4
|
+
import logging
|
|
4
5
|
import multiprocessing as mp
|
|
5
6
|
import pathlib
|
|
7
|
+
import time
|
|
6
8
|
|
|
7
9
|
import h5py
|
|
8
10
|
|
|
@@ -41,8 +43,11 @@ class Background(abc.ABC):
|
|
|
41
43
|
kwargs:
|
|
42
44
|
Additional keyword arguments passed to the subclass.
|
|
43
45
|
"""
|
|
46
|
+
self.logger = logging.getLogger(
|
|
47
|
+
f"dcnum.feat.feat_background.{self.__class__.__name__}")
|
|
44
48
|
# proper conversion to Path objects
|
|
45
49
|
output_path = pathlib.Path(output_path)
|
|
50
|
+
self.output_path = output_path
|
|
46
51
|
if isinstance(input_data, str):
|
|
47
52
|
input_data = pathlib.Path(input_data)
|
|
48
53
|
# kwargs checks
|
|
@@ -188,20 +193,30 @@ class Background(abc.ABC):
|
|
|
188
193
|
return self.image_proc.value
|
|
189
194
|
|
|
190
195
|
def process(self):
|
|
196
|
+
"""Perform the background computation
|
|
197
|
+
|
|
198
|
+
This irreversibly removes/overrides any "image_bg" and
|
|
199
|
+
"bg_off" features defined in the output file `self.h5out`.
|
|
200
|
+
"""
|
|
201
|
+
t0 = time.perf_counter()
|
|
191
202
|
# Delete any old background data
|
|
192
|
-
for
|
|
193
|
-
|
|
194
|
-
|
|
203
|
+
for ds_key in ["image_bg", "bg_off"]:
|
|
204
|
+
for grp_key in ["events", "basin_events"]:
|
|
205
|
+
if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
|
|
206
|
+
del self.h5out[grp_key][ds_key]
|
|
195
207
|
# Perform the actual background computation
|
|
196
208
|
self.process_approach()
|
|
197
209
|
bg_ppid = self.get_ppid()
|
|
198
210
|
# Store pipeline information in the image_bg/bg_off feature
|
|
199
|
-
for
|
|
200
|
-
|
|
201
|
-
self.h5out
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
211
|
+
for ds_key in ["image_bg", "bg_off"]:
|
|
212
|
+
for grp_key in ["events", "basin_events"]:
|
|
213
|
+
if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
|
|
214
|
+
self.h5out[f"{grp_key}/{ds_key}"].attrs[
|
|
215
|
+
"dcnum ppid background"] = bg_ppid
|
|
216
|
+
self.h5out[F"{grp_key}/{ds_key}"].attrs[
|
|
217
|
+
"dcnum ppid generation"] = ppid.DCNUM_PPID_GENERATION
|
|
218
|
+
self.logger.info(
|
|
219
|
+
f"Background computation time: {time.perf_counter()-t0:.1f}s")
|
|
205
220
|
|
|
206
221
|
@abc.abstractmethod
|
|
207
222
|
def process_approach(self):
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import queue
|
|
3
2
|
import time
|
|
4
3
|
|
|
@@ -9,14 +8,13 @@ from ...read import HDF5Data
|
|
|
9
8
|
|
|
10
9
|
from .base import mp_spawn, Background
|
|
11
10
|
|
|
12
|
-
logger = logging.getLogger(__name__)
|
|
13
|
-
|
|
14
11
|
|
|
15
12
|
class BackgroundSparseMed(Background):
|
|
16
13
|
def __init__(self, input_data, output_path, kernel_size=200,
|
|
17
14
|
split_time=1., thresh_cleansing=0, frac_cleansing=.8,
|
|
18
15
|
offset_correction=True,
|
|
19
|
-
compress=True,
|
|
16
|
+
compress=True,
|
|
17
|
+
num_cpus=None):
|
|
20
18
|
"""Sparse median background correction with cleansing
|
|
21
19
|
|
|
22
20
|
In contrast to the rolling median background correction,
|
|
@@ -61,7 +59,7 @@ class BackgroundSparseMed(Background):
|
|
|
61
59
|
offset_correction: bool
|
|
62
60
|
The sparse median background correction produces one median
|
|
63
61
|
image for multiple input frames (BTW this also leads to very
|
|
64
|
-
efficient data storage with HDF5
|
|
62
|
+
efficient data storage with internal HDF5 basins). In
|
|
65
63
|
case the input frames are subject to frame-by-frame brightness
|
|
66
64
|
variations (e.g. flickering of the illumination source), it
|
|
67
65
|
is useful to have an offset value per frame that can then be
|
|
@@ -79,6 +77,11 @@ class BackgroundSparseMed(Background):
|
|
|
79
77
|
num_cpus: int
|
|
80
78
|
Number of CPUs to use for median computation. Defaults to
|
|
81
79
|
`multiprocessing.cpu_count()`.
|
|
80
|
+
|
|
81
|
+
.. versionchanged:: 0.23.5
|
|
82
|
+
|
|
83
|
+
The background image data are stored as an internal
|
|
84
|
+
mapped basin to reduce the output file size.
|
|
82
85
|
"""
|
|
83
86
|
super(BackgroundSparseMed, self).__init__(
|
|
84
87
|
input_data=input_data,
|
|
@@ -93,7 +96,7 @@ class BackgroundSparseMed(Background):
|
|
|
93
96
|
)
|
|
94
97
|
|
|
95
98
|
if kernel_size > len(self.input_data):
|
|
96
|
-
logger.warning(
|
|
99
|
+
self.logger.warning(
|
|
97
100
|
f"The kernel size {kernel_size} is too large for input data"
|
|
98
101
|
f"size {len(self.input_data)}. Setting it to input data size!")
|
|
99
102
|
kernel_size = len(self.input_data)
|
|
@@ -126,13 +129,14 @@ class BackgroundSparseMed(Background):
|
|
|
126
129
|
else:
|
|
127
130
|
# compute time using frame rate (approximate)
|
|
128
131
|
dur = self.image_count / fr * 1.5
|
|
129
|
-
logger.info(
|
|
132
|
+
self.logger.info(
|
|
133
|
+
f"Approximating duration: {dur/60:.1f}min")
|
|
130
134
|
self.time = np.linspace(0, dur, self.image_count,
|
|
131
135
|
endpoint=True)
|
|
132
136
|
if self.time is None:
|
|
133
137
|
# No HDF5 file or no information therein; Make an educated guess.
|
|
134
138
|
dur = self.image_count / 3600 * 1.5
|
|
135
|
-
logger.info(f"Guessing duration: {dur/60:.1f}min")
|
|
139
|
+
self.logger.info(f"Guessing duration: {dur/60:.1f}min")
|
|
136
140
|
self.time = np.linspace(0, dur, self.image_count,
|
|
137
141
|
endpoint=True)
|
|
138
142
|
|
|
@@ -222,7 +226,7 @@ class BackgroundSparseMed(Background):
|
|
|
222
226
|
offset_correction: bool
|
|
223
227
|
The sparse median background correction produces one median
|
|
224
228
|
image for multiple input frames (BTW this also leads to very
|
|
225
|
-
efficient data storage with HDF5
|
|
229
|
+
efficient data storage with internal HDF5 basins). In
|
|
226
230
|
case the input frames are subject to frame-by-frame brightness
|
|
227
231
|
variations (e.g. flickering of the illumination source), it
|
|
228
232
|
is useful to have an offset value per frame that can then be
|
|
@@ -301,18 +305,18 @@ class BackgroundSparseMed(Background):
|
|
|
301
305
|
thresh = np.quantile(ref, self.frac_cleansing)
|
|
302
306
|
used = ref <= thresh
|
|
303
307
|
frac_remove = np.sum(~used) / used.size
|
|
304
|
-
logger.warning(
|
|
308
|
+
self.logger.warning(
|
|
305
309
|
f"{frac_remove_user:.1%} of the background images would "
|
|
306
310
|
f"be removed with the current settings, so we enforce "
|
|
307
311
|
f"`frac_cleansing`. To avoid this warning, try decreasing "
|
|
308
312
|
f"`thresh_cleansing` or `frac_cleansing`. The new "
|
|
309
313
|
f"threshold is {thresh_fact / thresh}.")
|
|
310
314
|
|
|
311
|
-
logger.info(f"Cleansed {frac_remove:.2%}")
|
|
315
|
+
self.logger.info(f"Cleansed {frac_remove:.2%}")
|
|
312
316
|
step_times = self.step_times[used]
|
|
313
317
|
bg_images = self.bg_images[used]
|
|
314
318
|
else:
|
|
315
|
-
logger.info("Background series cleansing disabled")
|
|
319
|
+
self.logger.info("Background series cleansing disabled")
|
|
316
320
|
step_times = self.step_times
|
|
317
321
|
bg_images = self.bg_images
|
|
318
322
|
|
|
@@ -322,35 +326,55 @@ class BackgroundSparseMed(Background):
|
|
|
322
326
|
idx1 = None
|
|
323
327
|
for ii in range(len(step_times)):
|
|
324
328
|
t1 = step_times[ii]
|
|
325
|
-
idx1 = np.argmin(np.abs(self.time - t1
|
|
329
|
+
idx1 = np.argmin(np.abs(self.time - t1 - self.split_time/2))
|
|
326
330
|
bg_idx[idx0:idx1] = ii
|
|
327
331
|
idx0 = idx1
|
|
328
332
|
if idx1 is not None:
|
|
329
333
|
# Fill up remainder of index array with last entry
|
|
330
334
|
bg_idx[idx1:] = ii
|
|
331
335
|
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
336
|
+
# Store the background images as an internal mapped basin
|
|
337
|
+
self.writer.store_basin(
|
|
338
|
+
name="background images",
|
|
339
|
+
description=f"Pipeline identifier: {self.get_ppid()}",
|
|
340
|
+
mapping=bg_idx,
|
|
341
|
+
internal_data={"image_bg": bg_images}
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# store the offset correction, if applicable
|
|
345
|
+
if self.offset_correction:
|
|
346
|
+
self.logger.info("Computing offset correction")
|
|
347
|
+
# compute the mean at the top of all background images
|
|
348
|
+
sh, sw = self.input_data.shape[1:]
|
|
349
|
+
roi_full = (slice(None), slice(0, 20), slice(0, sw))
|
|
350
|
+
bg_data_mean = np.mean(bg_images[roi_full], axis=(1, 2))
|
|
351
|
+
pos = 0
|
|
352
|
+
step = self.writer.get_best_nd_chunks(item_shape=(sh, sw),
|
|
353
|
+
feat_dtype=np.uint8)[0]
|
|
354
|
+
bg_off = np.zeros(self.image_count, dtype=float)
|
|
355
|
+
# For every chunk in the input image data, compute that
|
|
356
|
+
# value as well and store the resulting offset value.
|
|
357
|
+
# TODO: Could this be parallelized, or are we limited in reading?
|
|
358
|
+
while pos < self.image_count:
|
|
359
|
+
stop = min(pos + step, self.image_count)
|
|
343
360
|
# Record background offset correction "bg_off". We take a
|
|
344
361
|
# slice of 20px from the top of the image (there are normally
|
|
345
362
|
# no events here, only the channel walls are visible).
|
|
346
|
-
|
|
347
|
-
|
|
363
|
+
cur_slice = slice(pos, stop)
|
|
364
|
+
# mean background brightness
|
|
365
|
+
val_bg = bg_data_mean[bg_idx[cur_slice]]
|
|
366
|
+
# mean image brightness
|
|
348
367
|
roi_cur = (cur_slice, slice(0, 20), slice(0, sw))
|
|
349
|
-
val_bg = np.mean(cur_bg_data[roi_full], axis=(1, 2))
|
|
350
368
|
val_dat = np.mean(self.input_data[roi_cur], axis=(1, 2))
|
|
351
369
|
# background image = image_bg + bg_off
|
|
352
|
-
|
|
353
|
-
|
|
370
|
+
bg_off[cur_slice] = val_dat - val_bg
|
|
371
|
+
# set progress
|
|
372
|
+
self.image_proc.value = 0.5 * (1 + pos / self.image_count)
|
|
373
|
+
pos = stop
|
|
374
|
+
# finally, store the background offset feature
|
|
375
|
+
self.writer.store_feature_chunk("bg_off", bg_off)
|
|
376
|
+
|
|
377
|
+
self.image_proc.value = 1
|
|
354
378
|
|
|
355
379
|
def process_second(self,
|
|
356
380
|
ii: int,
|
|
@@ -393,7 +417,9 @@ class BackgroundSparseMed(Background):
|
|
|
393
417
|
|
|
394
418
|
self.bg_images[ii] = self.shared_output.reshape(self.image_shape)
|
|
395
419
|
|
|
396
|
-
self.image_proc.value = idx_stop /
|
|
420
|
+
self.image_proc.value = idx_stop / (
|
|
421
|
+
# with offset correction, everything is slower
|
|
422
|
+
self.image_count * (1 + self.offset_correction))
|
|
397
423
|
|
|
398
424
|
|
|
399
425
|
class WorkerSparseMed(mp_spawn.Process):
|
dcnum/logic/ctrl.py
CHANGED
|
@@ -403,6 +403,12 @@ class DCNumJobRunner(threading.Thread):
|
|
|
403
403
|
features=orig_feats,
|
|
404
404
|
mapping=None)
|
|
405
405
|
|
|
406
|
+
# Handle basin data according to the user's request
|
|
407
|
+
self.state = "plumbing"
|
|
408
|
+
self.task_enforce_basin_strategy()
|
|
409
|
+
|
|
410
|
+
self.state = "cleanup"
|
|
411
|
+
|
|
406
412
|
with HDF5Writer(self.path_temp_out) as hw:
|
|
407
413
|
# pipeline metadata
|
|
408
414
|
hw.h5.attrs["pipeline:dcnum generation"] = self.ppdict["gen_id"]
|
|
@@ -462,11 +468,7 @@ class DCNumJobRunner(threading.Thread):
|
|
|
462
468
|
|
|
463
469
|
# copy metadata/logs/tables from original file
|
|
464
470
|
with h5py.File(self.job["path_in"]) as h5_src:
|
|
465
|
-
copy_metadata(h5_src=h5_src,
|
|
466
|
-
h5_dst=hw.h5,
|
|
467
|
-
# Don't copy basins, we would have to index-map
|
|
468
|
-
# them first.
|
|
469
|
-
copy_basins=False)
|
|
471
|
+
copy_metadata(h5_src=h5_src, h5_dst=hw.h5)
|
|
470
472
|
if redo_seg:
|
|
471
473
|
# Store the correct measurement identifier. This is used to
|
|
472
474
|
# identify this file as a correct basin in subsequent pipeline
|
|
@@ -490,12 +492,6 @@ class DCNumJobRunner(threading.Thread):
|
|
|
490
492
|
mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
|
|
491
493
|
hw.h5.attrs["experiment:run identifier"] = mid_new
|
|
492
494
|
|
|
493
|
-
# Handle basin data according to the user's request
|
|
494
|
-
self.state = "plumbing"
|
|
495
|
-
self.task_enforce_basin_strategy()
|
|
496
|
-
|
|
497
|
-
self.state = "cleanup"
|
|
498
|
-
|
|
499
495
|
trun = datetime.timedelta(seconds=round(time.monotonic() - time_start))
|
|
500
496
|
self.logger.info(f"Run duration: {str(trun)}")
|
|
501
497
|
self.logger.info(time.strftime("Run stop: %Y-%m-%d-%H.%M.%S",
|
|
@@ -547,22 +543,17 @@ class DCNumJobRunner(threading.Thread):
|
|
|
547
543
|
"""
|
|
548
544
|
self._progress_bn = 0
|
|
549
545
|
t0 = time.perf_counter()
|
|
550
|
-
# We
|
|
551
|
-
#
|
|
552
|
-
#
|
|
553
|
-
#
|
|
554
|
-
#
|
|
555
|
-
# features
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
# (e.g. "temp" or "frame")
|
|
562
|
-
[self.draw.h5, self.draw.features_scalar_frame, "optional"],
|
|
563
|
-
# 3. image features from the input file
|
|
564
|
-
[self.draw.h5, ["image", "image_bg", "bg_off"], "optional"],
|
|
565
|
-
]
|
|
546
|
+
# We have these points to consider:
|
|
547
|
+
# - We must use the `basinmap` feature to map from the original
|
|
548
|
+
# file to the output file.
|
|
549
|
+
# - We must copy "bg_off" and "image_bg" to the output file.
|
|
550
|
+
# - For the "drain" basin strategy, we also have to copy all the
|
|
551
|
+
# other features.
|
|
552
|
+
# - If "image_bg" is defined as an internal basin in the input
|
|
553
|
+
# file, we have to convert the mapping and store a corresponding
|
|
554
|
+
# internal basin in the output file.
|
|
555
|
+
|
|
556
|
+
# Determine the basinmap feature
|
|
566
557
|
with HDF5Writer(self.path_temp_out) as hw:
|
|
567
558
|
hout = hw.h5
|
|
568
559
|
# First, we have to determine the basin mapping from input to
|
|
@@ -584,14 +575,15 @@ class DCNumJobRunner(threading.Thread):
|
|
|
584
575
|
# to the original input HDF5 file.
|
|
585
576
|
raw_im = self.draw.index_mapping
|
|
586
577
|
if raw_im is None:
|
|
587
|
-
self.logger.info("Input file mapped with basinmap0")
|
|
588
578
|
# Create a hard link to save time and space
|
|
589
579
|
hout["events/basinmap0"] = hout["events/index_unmapped"]
|
|
590
|
-
|
|
580
|
+
basinmap0 = idx_um
|
|
591
581
|
else:
|
|
592
|
-
|
|
582
|
+
self.logger.info("Converting input mapping")
|
|
583
|
+
basinmap0 = get_mapping_indices(raw_im)[idx_um]
|
|
593
584
|
# Store the mapped basin data in the output file.
|
|
594
|
-
hw.store_feature_chunk("basinmap0",
|
|
585
|
+
hw.store_feature_chunk("basinmap0", basinmap0)
|
|
586
|
+
self.logger.info("Input mapped to output with basinmap0")
|
|
595
587
|
# We don't need them anymore.
|
|
596
588
|
del hout["events/index_unmapped"]
|
|
597
589
|
|
|
@@ -599,19 +591,72 @@ class DCNumJobRunner(threading.Thread):
|
|
|
599
591
|
# is the size of the raw dataset and the latter is its mapped
|
|
600
592
|
# size!
|
|
601
593
|
size_raw = self.draw.h5.attrs["experiment:event count"]
|
|
602
|
-
if (len(
|
|
603
|
-
and np.all(
|
|
594
|
+
if (len(basinmap0) == size_raw
|
|
595
|
+
and np.all(basinmap0 == np.arange(size_raw))):
|
|
604
596
|
# This means that the images in the input overlap perfectly
|
|
605
597
|
# with the images in the output, i.e. a "copy" segmenter
|
|
606
598
|
# was used or something is very reproducible.
|
|
607
599
|
# We set basinmap to None to be more efficient.
|
|
608
|
-
|
|
600
|
+
basinmap0 = None
|
|
609
601
|
|
|
610
602
|
else:
|
|
611
603
|
# The input is identical to the output, because we are using
|
|
612
604
|
# the same pipeline identifier.
|
|
613
|
-
|
|
614
|
-
|
|
605
|
+
basinmap0 = None
|
|
606
|
+
|
|
607
|
+
# List of features we have to copy from input to output.
|
|
608
|
+
# We need to make sure that the features are correctly attributed
|
|
609
|
+
# from the input files. E.g. if the input file already has
|
|
610
|
+
# background images, but we recompute the background images, then
|
|
611
|
+
# we have to use the data from the recomputed background file.
|
|
612
|
+
# We achieve this by keeping a specific order and only copying
|
|
613
|
+
# those features that we don't already have in the output file.
|
|
614
|
+
feats_raw = [
|
|
615
|
+
# background data from the temporary input image
|
|
616
|
+
[self.dtin.h5, ["bg_off"], "critical"],
|
|
617
|
+
[self.draw.h5, self.draw.features_scalar_frame, "optional"],
|
|
618
|
+
[self.draw.h5, ["image", "bg_off"], "optional"],
|
|
619
|
+
]
|
|
620
|
+
|
|
621
|
+
# Store image_bg as an internal basin, if defined in input
|
|
622
|
+
for idx in range(len(self.dtin.basins)):
|
|
623
|
+
bn_dict = self.dtin.basins[idx]
|
|
624
|
+
if (bn_dict["type"] == "internal"
|
|
625
|
+
and "image_bg" in bn_dict["features"]):
|
|
626
|
+
self.logger.info(
|
|
627
|
+
"Copying internal basin background images")
|
|
628
|
+
bn_grp, bn_feats, bn_map = self.dtin.get_basin_data(idx)
|
|
629
|
+
assert "image_bg" in bn_feats
|
|
630
|
+
# Load all images into memory (should only be ~600)
|
|
631
|
+
bg_images1 = self.dtin.h5["basin_events"]["image_bg"][:]
|
|
632
|
+
# Get the original internal mapping for these images
|
|
633
|
+
# Note that `basinmap0` always refers to indices in the
|
|
634
|
+
# original raw input file, and not to indices in an
|
|
635
|
+
# optional mapped input file (using `index_mapping`).
|
|
636
|
+
# Therefore, we do `self.dtin.h5["events"]["basinmap0"]`
|
|
637
|
+
# instead of `self.dtin["basinmap0"]`
|
|
638
|
+
basinmap_in = self.dtin.h5["events"][bn_dict["mapping"]][:]
|
|
639
|
+
# Now we have to convert the indices in `basinmap_in`
|
|
640
|
+
# to indices in the output file.
|
|
641
|
+
basinmap1 = basinmap_in[basinmap0]
|
|
642
|
+
# Store the internal mapping in the output file
|
|
643
|
+
hw.store_basin(name=bn_dict["name"],
|
|
644
|
+
description=bn_dict["description"],
|
|
645
|
+
mapping=basinmap1,
|
|
646
|
+
internal_data={"image_bg": bg_images1}
|
|
647
|
+
)
|
|
648
|
+
break
|
|
649
|
+
else:
|
|
650
|
+
self.logger.info("Background images must be copied")
|
|
651
|
+
# There is no internal image_bg feature, probably because
|
|
652
|
+
# the user did not use the sparsemed background correction.
|
|
653
|
+
# In this case, we simply add "image_bg" to the `feats_raw`.
|
|
654
|
+
feats_raw += [
|
|
655
|
+
[self.dtin.h5, ["image_bg"], "critical"],
|
|
656
|
+
[self.draw.h5, ["image_bg"], "optional"],
|
|
657
|
+
]
|
|
658
|
+
|
|
659
|
+
# Copy the features required in the output file.
|
|
615
660
|
for hin, feats, importance in feats_raw:
|
|
616
661
|
# Only consider features that are available in the input
|
|
617
662
|
# and that are not already in the output.
|
|
@@ -626,7 +671,7 @@ class DCNumJobRunner(threading.Thread):
|
|
|
626
671
|
copy_features(h5_src=hin,
|
|
627
672
|
h5_dst=hout,
|
|
628
673
|
features=feats,
|
|
629
|
-
mapping=
|
|
674
|
+
mapping=basinmap0)
|
|
630
675
|
else:
|
|
631
676
|
# TAP: Create basins for the "optional" features in the
|
|
632
677
|
# output file. Note that the "critical" features never
|
|
@@ -638,7 +683,7 @@ class DCNumJobRunner(threading.Thread):
|
|
|
638
683
|
paths = [pin, os.path.relpath(pin, pout)]
|
|
639
684
|
hw.store_basin(name="dcnum basin",
|
|
640
685
|
features=feats,
|
|
641
|
-
mapping=
|
|
686
|
+
mapping=basinmap0,
|
|
642
687
|
paths=paths,
|
|
643
688
|
description=f"Created with dcnum {version}",
|
|
644
689
|
)
|
dcnum/meta/ppid.py
CHANGED
|
@@ -7,10 +7,11 @@ import pathlib
|
|
|
7
7
|
from typing import Dict, List, Protocol
|
|
8
8
|
import warnings
|
|
9
9
|
|
|
10
|
+
import numpy as np
|
|
10
11
|
|
|
11
12
|
#: Increment this string if there are breaking changes that make
|
|
12
13
|
#: previous pipelines unreproducible.
|
|
13
|
-
DCNUM_PPID_GENERATION = "
|
|
14
|
+
DCNUM_PPID_GENERATION = "11"
|
|
14
15
|
|
|
15
16
|
|
|
16
17
|
class ClassWithPPIDCapabilities(Protocol):
|
|
@@ -140,9 +141,9 @@ def kwargs_to_ppid(cls: ClassWithPPIDCapabilities,
|
|
|
140
141
|
path = pathlib.Path(val)
|
|
141
142
|
if path.exists():
|
|
142
143
|
val = path.name
|
|
143
|
-
if isinstance(val, bool):
|
|
144
|
+
if isinstance(val, (bool, np.bool_)):
|
|
144
145
|
val = int(val) # do not print e.g. "True"
|
|
145
|
-
elif isinstance(val, float):
|
|
146
|
+
elif isinstance(val, (float, np.floating)):
|
|
146
147
|
if val == int(val):
|
|
147
148
|
val = int(val) # omit the ".0" at the end
|
|
148
149
|
concat_strings.append(f"{abr}={val}")
|
dcnum/read/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# flake8: noqa: F401
|
|
2
2
|
from .cache import md5sum
|
|
3
3
|
from .const import PROTECTED_FEATURES
|
|
4
|
+
from .detect_flicker import detect_flickering
|
|
4
5
|
from .hdf5_data import HDF5Data, HDF5ImageCache, concatenated_hdf5_data
|
|
5
6
|
from .mapped import get_mapping_indices, get_mapped_object
|
dcnum/read/cache.py
CHANGED
|
@@ -36,9 +36,10 @@ class BaseImageChunkCache(abc.ABC):
|
|
|
36
36
|
def __getitem__(self, index):
|
|
37
37
|
if isinstance(index, (slice, list, np.ndarray)):
|
|
38
38
|
if isinstance(index, slice):
|
|
39
|
-
indices = np.arange(
|
|
40
|
-
|
|
41
|
-
|
|
39
|
+
indices = np.arange(
|
|
40
|
+
index.start or 0,
|
|
41
|
+
min(index.stop, len(self)) if index.stop else len(self),
|
|
42
|
+
index.step)
|
|
42
43
|
else:
|
|
43
44
|
indices = index
|
|
44
45
|
array_out = np.empty((len(indices),) + self.image_shape,
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from .hdf5_data import HDF5Data
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def detect_flickering(image_data: np.ndarray | HDF5Data,
|
|
7
|
+
roi_height: int = 10,
|
|
8
|
+
brightness_threshold: float = 2.5,
|
|
9
|
+
count_threshold: int = 5,
|
|
10
|
+
max_frames: int = 1000):
|
|
11
|
+
"""Determine whether an image series experiences flickering
|
|
12
|
+
|
|
13
|
+
Flickering is an unwelcome phenomenon due to a faulty data
|
|
14
|
+
acquisition device. For instance, if there is random voltage noise in
|
|
15
|
+
the electronics managing the LED power, then the brightness of the
|
|
16
|
+
LED will vary randomly when the noise signal overlaps with the flash
|
|
17
|
+
triggering signal.
|
|
18
|
+
|
|
19
|
+
If flickering is detected, you should use the "sparsemed" background
|
|
20
|
+
computation with `offset_correction` set to True.
|
|
21
|
+
|
|
22
|
+
Parameters
|
|
23
|
+
----------
|
|
24
|
+
image_data:
|
|
25
|
+
sliceable object (e.g. numpy array or HDF5Data) containing
|
|
26
|
+
image data.
|
|
27
|
+
roi_height: int
|
|
28
|
+
height of the ROI in pixels for which to search for flickering;
|
|
29
|
+
the entire width of the image is used
|
|
30
|
+
brightness_threshold: float
|
|
31
|
+
brightness difference between individual ROIs median and median
|
|
32
|
+
of all ROI medians leading to a positive flickering event
|
|
33
|
+
count_threshold: int
|
|
34
|
+
minimum number of flickering events that would lead to a positive
|
|
35
|
+
flickering decision
|
|
36
|
+
max_frames: int
|
|
37
|
+
maximum number of frames to include in the flickering analysis
|
|
38
|
+
"""
|
|
39
|
+
# slice event axis first in case we have and HDF5Data instance
|
|
40
|
+
roi_data = image_data[:max_frames][:, :roi_height, :]
|
|
41
|
+
roi_median = np.median(roi_data, axis=(1, 2))
|
|
42
|
+
roi_offset = roi_median - np.median(roi_median)
|
|
43
|
+
flickering_events = np.sum(np.abs(roi_offset) >= abs(brightness_threshold))
|
|
44
|
+
return flickering_events >= count_threshold
|
dcnum/read/hdf5_data.py
CHANGED
|
@@ -102,10 +102,12 @@ class HDF5Data:
|
|
|
102
102
|
elif (feat in self.h5["events"]
|
|
103
103
|
and len(self.h5["events"][feat].shape) == 1): # cache scalar
|
|
104
104
|
if self.index_mapping is None:
|
|
105
|
-
|
|
105
|
+
# no mapping indices, just slice
|
|
106
|
+
dat_sc = self.h5["events"][feat][:]
|
|
106
107
|
else:
|
|
107
|
-
|
|
108
|
-
|
|
108
|
+
dat_sc = get_mapped_object(self.h5["events"][feat],
|
|
109
|
+
index_mapping=self.index_mapping)[:]
|
|
110
|
+
self._cache_scalar[feat] = dat_sc
|
|
109
111
|
return self._cache_scalar[feat]
|
|
110
112
|
else:
|
|
111
113
|
if feat in self.h5["events"]:
|
|
@@ -117,9 +119,11 @@ class HDF5Data:
|
|
|
117
119
|
else:
|
|
118
120
|
# Check the basins
|
|
119
121
|
for idx in range(len(self.basins)):
|
|
120
|
-
|
|
121
|
-
if
|
|
122
|
-
|
|
122
|
+
bn_grp, bn_feats, bn_map = self.get_basin_data(idx)
|
|
123
|
+
if bn_feats and feat in bn_feats:
|
|
124
|
+
mapped_ds = get_mapped_object(obj=bn_grp[feat],
|
|
125
|
+
index_mapping=bn_map)
|
|
126
|
+
return mapped_ds
|
|
123
127
|
# If we got here, then the feature data does not exist.
|
|
124
128
|
raise KeyError(f"Feature '{feat}' not found in {self}!")
|
|
125
129
|
|
|
@@ -200,14 +204,7 @@ class HDF5Data:
|
|
|
200
204
|
np.array(h5["tables"][tab][tkey]).reshape(-1)
|
|
201
205
|
self.tables[tab] = tabdict
|
|
202
206
|
# basins
|
|
203
|
-
basins =
|
|
204
|
-
for bnkey in h5.get("basins", {}).keys():
|
|
205
|
-
bn_data = "\n".join(
|
|
206
|
-
[s.decode() for s in h5["basins"][bnkey][:].tolist()])
|
|
207
|
-
bn_dict = json.loads(bn_data)
|
|
208
|
-
if bn_dict["type"] == "file":
|
|
209
|
-
# we only support file-based basins
|
|
210
|
-
basins.append(bn_dict)
|
|
207
|
+
basins = self.extract_basin_dicts(h5)
|
|
211
208
|
self.basins = sorted(basins, key=lambda x: x["name"])
|
|
212
209
|
|
|
213
210
|
if state["pixel_size"] is not None:
|
|
@@ -273,6 +270,30 @@ class HDF5Data:
|
|
|
273
270
|
pixel_size = float(f"{pixel_size:.8f}")
|
|
274
271
|
self.meta["imaging:pixel size"] = pixel_size
|
|
275
272
|
|
|
273
|
+
@staticmethod
|
|
274
|
+
def extract_basin_dicts(h5, check=True):
|
|
275
|
+
"""Return list of basin dictionaries"""
|
|
276
|
+
# TODO:
|
|
277
|
+
# - support iterative mapped basins and catch
|
|
278
|
+
# circular basin definitions.
|
|
279
|
+
basins = []
|
|
280
|
+
for bnkey in h5.get("basins", {}).keys():
|
|
281
|
+
bn_data = "\n".join(
|
|
282
|
+
[s.decode() for s in h5["basins"][bnkey][:].tolist()])
|
|
283
|
+
bn_dict = json.loads(bn_data)
|
|
284
|
+
if check:
|
|
285
|
+
if bn_dict["type"] not in ["internal", "file"]:
|
|
286
|
+
# we only support file-based and internal basins
|
|
287
|
+
continue
|
|
288
|
+
basinmap = bn_dict.get("mapping")
|
|
289
|
+
if basinmap is not None and basinmap not in h5["events"]:
|
|
290
|
+
# basinmap feature is missing
|
|
291
|
+
continue
|
|
292
|
+
# Add the basin
|
|
293
|
+
basins.append(bn_dict)
|
|
294
|
+
|
|
295
|
+
return basins
|
|
296
|
+
|
|
276
297
|
@property
|
|
277
298
|
def features_scalar_frame(self):
|
|
278
299
|
"""Scalar features that apply to all events in a frame
|
|
@@ -291,9 +312,10 @@ class HDF5Data:
|
|
|
291
312
|
|
|
292
313
|
def close(self):
|
|
293
314
|
"""Close the underlying HDF5 file"""
|
|
294
|
-
for
|
|
295
|
-
if
|
|
296
|
-
|
|
315
|
+
for bn_group, _, _ in self._basin_data.values():
|
|
316
|
+
if bn_group is not None:
|
|
317
|
+
if bn_group.id.valid:
|
|
318
|
+
bn_group.file.close()
|
|
297
319
|
self._image_cache.clear()
|
|
298
320
|
self._basin_data.clear()
|
|
299
321
|
self.h5.close()
|
|
@@ -371,66 +393,110 @@ class HDF5Data:
|
|
|
371
393
|
raise ValueError(f"Invalid parameter '{var}'!")
|
|
372
394
|
return kwargs
|
|
373
395
|
|
|
374
|
-
def get_basin_data(self, index)
|
|
396
|
+
def get_basin_data(self, index: int) -> (
|
|
397
|
+
h5py.Group,
|
|
398
|
+
List,
|
|
399
|
+
int | slice | List | np.ndarray,
|
|
400
|
+
):
|
|
375
401
|
"""Return HDF5Data info for a basin index in `self.basins`
|
|
376
402
|
|
|
403
|
+
Parameters
|
|
404
|
+
----------
|
|
405
|
+
index: int
|
|
406
|
+
index of the basin from which to get data
|
|
407
|
+
|
|
377
408
|
Returns
|
|
378
409
|
-------
|
|
379
|
-
|
|
380
|
-
|
|
410
|
+
group: h5py.Group
|
|
411
|
+
HDF5 group containing HDF5 Datasets with the names
|
|
412
|
+
listed in `features`
|
|
381
413
|
features: list of str
|
|
382
|
-
|
|
414
|
+
list of features made available by this basin
|
|
415
|
+
index_mapping:
|
|
416
|
+
a mapping (see `__init__`) that defines mapping from
|
|
417
|
+
the basin dataset to the referring dataset
|
|
383
418
|
"""
|
|
384
419
|
if index not in self._basin_data:
|
|
385
420
|
bn_dict = self.basins[index]
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
# try relative path
|
|
393
|
-
prel = pathlib.Path(self.path).parent / pp
|
|
394
|
-
if prel.exists():
|
|
395
|
-
path = prel
|
|
396
|
-
break
|
|
421
|
+
|
|
422
|
+
# HDF5 group containing the feature data
|
|
423
|
+
if bn_dict["type"] == "file":
|
|
424
|
+
h5group, features = self._get_basin_data_file(bn_dict)
|
|
425
|
+
elif bn_dict["type"] == "internal":
|
|
426
|
+
h5group, features = self._get_basin_data_internal(bn_dict)
|
|
397
427
|
else:
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
428
|
+
raise ValueError(f"Invalid basin type '{bn_dict['type']}'")
|
|
429
|
+
|
|
430
|
+
# index mapping
|
|
431
|
+
feat_basinmap = bn_dict.get("mapping", None)
|
|
432
|
+
if feat_basinmap is None:
|
|
433
|
+
# This is NOT a mapped basin.
|
|
434
|
+
index_mapping = self.index_mapping
|
|
401
435
|
else:
|
|
402
|
-
|
|
403
|
-
if
|
|
404
|
-
#
|
|
405
|
-
|
|
436
|
+
# This is a mapped basin. Create an indexing list.
|
|
437
|
+
if self.index_mapping is None:
|
|
438
|
+
# The current dataset is not mapped.
|
|
439
|
+
basinmap_idx = slice(None)
|
|
406
440
|
else:
|
|
407
|
-
#
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
basinmap_idx = get_mapping_indices(self.index_mapping)
|
|
414
|
-
basinmap = self.h5[f"events/{feat_basinmap}"]
|
|
415
|
-
index_mapping = basinmap[basinmap_idx]
|
|
416
|
-
|
|
417
|
-
h5dat = HDF5Data(path, index_mapping=index_mapping)
|
|
418
|
-
features = bn_dict.get("features")
|
|
419
|
-
if features is None:
|
|
420
|
-
# Only get the features from the actual HDF5 file.
|
|
421
|
-
# If this file has basins as well, the basin metadata
|
|
422
|
-
# should have been copied over to the parent file. This
|
|
423
|
-
# makes things a little cleaner, because basins are not
|
|
424
|
-
# nested, but all basins are available in the top file.
|
|
425
|
-
# See :func:`write.store_metadata` for copying metadata
|
|
426
|
-
# between files.
|
|
427
|
-
# The writer can still specify "features" in the basin
|
|
428
|
-
# metadata, then these basins are indeed nested, and
|
|
429
|
-
# we consider that ok as well.
|
|
430
|
-
features = sorted(h5dat.h5["events"].keys())
|
|
431
|
-
self._basin_data[index] = (h5dat, features)
|
|
441
|
+
# The current dataset is also mapped.
|
|
442
|
+
basinmap_idx = get_mapping_indices(self.index_mapping)
|
|
443
|
+
basinmap = self.h5[f"events/{feat_basinmap}"]
|
|
444
|
+
index_mapping = basinmap[basinmap_idx]
|
|
445
|
+
|
|
446
|
+
self._basin_data[index] = (h5group, features, index_mapping)
|
|
432
447
|
return self._basin_data[index]
|
|
433
448
|
|
|
449
|
+
def _get_basin_data_file(self, bn_dict):
|
|
450
|
+
for ff in bn_dict["paths"]:
|
|
451
|
+
pp = pathlib.Path(ff)
|
|
452
|
+
if pp.is_absolute() and pp.exists():
|
|
453
|
+
path = pp
|
|
454
|
+
break
|
|
455
|
+
else:
|
|
456
|
+
# try relative path
|
|
457
|
+
prel = pathlib.Path(self.path).parent / pp
|
|
458
|
+
if prel.exists():
|
|
459
|
+
path = prel
|
|
460
|
+
break
|
|
461
|
+
else:
|
|
462
|
+
path = None
|
|
463
|
+
if path is None:
|
|
464
|
+
# Cannot get data from this basin / cannot find file
|
|
465
|
+
h5group = None
|
|
466
|
+
features = []
|
|
467
|
+
else:
|
|
468
|
+
h5 = h5py.File(path, "r")
|
|
469
|
+
h5group = h5["events"]
|
|
470
|
+
# features defined in the basin
|
|
471
|
+
features = bn_dict.get("features")
|
|
472
|
+
if features is None:
|
|
473
|
+
# Only get the features from the actual HDF5 file.
|
|
474
|
+
# If this file has basins as well, the basin metadata
|
|
475
|
+
# should have been copied over to the parent file. This
|
|
476
|
+
# makes things a little cleaner, because basins are not
|
|
477
|
+
# nested, but all basins are available in the top file.
|
|
478
|
+
# See :func:`write.store_metadata` for copying metadata
|
|
479
|
+
# between files.
|
|
480
|
+
# The writer can still specify "features" in the basin
|
|
481
|
+
# metadata, then these basins are indeed nested, and
|
|
482
|
+
# we consider that ok as well.
|
|
483
|
+
features = sorted(h5group.keys())
|
|
484
|
+
return h5group, features
|
|
485
|
+
|
|
486
|
+
def _get_basin_data_internal(self, bn_dict):
|
|
487
|
+
# The group name is normally "basin_events"
|
|
488
|
+
group_name = bn_dict["paths"][0]
|
|
489
|
+
if group_name != "basin_events":
|
|
490
|
+
warnings.warn(
|
|
491
|
+
f"Uncommon group name for basin features: {group_name}")
|
|
492
|
+
h5group = self.h5[group_name]
|
|
493
|
+
features = bn_dict.get("features")
|
|
494
|
+
if features is None:
|
|
495
|
+
raise ValueError(
|
|
496
|
+
f"Encountered invalid internal basin '{bn_dict}': "
|
|
497
|
+
f"'features' must be defined")
|
|
498
|
+
return h5group, features
|
|
499
|
+
|
|
434
500
|
def get_image_cache(self, feat):
|
|
435
501
|
"""Create an HDF5ImageCache object for the current dataset
|
|
436
502
|
|
|
@@ -444,15 +510,15 @@ class HDF5Data:
|
|
|
444
510
|
idx_map = None
|
|
445
511
|
# search all basins
|
|
446
512
|
for idx in range(len(self.basins)):
|
|
447
|
-
|
|
448
|
-
if
|
|
449
|
-
if feat in
|
|
513
|
+
bn_grp, bn_feats, bn_map = self.get_basin_data(idx)
|
|
514
|
+
if bn_feats is not None:
|
|
515
|
+
if feat in bn_feats:
|
|
450
516
|
# HDF5 dataset
|
|
451
|
-
ds =
|
|
517
|
+
ds = bn_grp[feat]
|
|
452
518
|
# Index mapping (taken from the basins which
|
|
453
519
|
# already includes the mapping from the current
|
|
454
520
|
# instance).
|
|
455
|
-
idx_map =
|
|
521
|
+
idx_map = bn_map
|
|
456
522
|
break
|
|
457
523
|
else:
|
|
458
524
|
ds = None
|
|
@@ -473,9 +539,9 @@ class HDF5Data:
|
|
|
473
539
|
features = sorted(self.h5["/events"].keys())
|
|
474
540
|
# add basin features
|
|
475
541
|
for ii in range(len(self.basins)):
|
|
476
|
-
_,
|
|
477
|
-
if
|
|
478
|
-
features +=
|
|
542
|
+
_, bn_feats, _ = self.get_basin_data(ii)
|
|
543
|
+
if bn_feats:
|
|
544
|
+
features += bn_feats
|
|
479
545
|
self._keys = sorted(set(features))
|
|
480
546
|
return self._keys
|
|
481
547
|
|
dcnum/read/mapped.py
CHANGED
|
@@ -27,8 +27,21 @@ class MappedHDF5Dataset:
|
|
|
27
27
|
if isinstance(idx, numbers.Integral):
|
|
28
28
|
return self.h5ds[self.mapping_indices[idx]]
|
|
29
29
|
else:
|
|
30
|
-
|
|
31
|
-
|
|
30
|
+
midx = self.mapping_indices[idx]
|
|
31
|
+
start = np.min(midx)
|
|
32
|
+
# Add one, because the final index must be included
|
|
33
|
+
stop = np.max(midx) + 1
|
|
34
|
+
# We have to perform mapping.
|
|
35
|
+
# Since h5py is very slow at indexing with arrays,
|
|
36
|
+
# we instead read the data in chunks from the input file,
|
|
37
|
+
# and perform the mapping afterward using the numpy arrays.
|
|
38
|
+
data_in = self.h5ds[start:stop]
|
|
39
|
+
# Determine the indices that we need from that chunk.
|
|
40
|
+
data = data_in[midx - start]
|
|
41
|
+
return data
|
|
42
|
+
|
|
43
|
+
def __len__(self):
|
|
44
|
+
return self.shape[0]
|
|
32
45
|
|
|
33
46
|
|
|
34
47
|
def get_mapping_indices(
|
dcnum/write/__init__.py
CHANGED
|
@@ -2,5 +2,5 @@
|
|
|
2
2
|
from .deque_writer_thread import DequeWriterThread
|
|
3
3
|
from .queue_collector_thread import EventStash, QueueCollectorThread
|
|
4
4
|
from .writer import (
|
|
5
|
-
HDF5Writer, copy_features, copy_metadata, create_with_basins,
|
|
5
|
+
HDF5Writer, copy_basins, copy_features, copy_metadata, create_with_basins,
|
|
6
6
|
set_default_filter_kwargs)
|
dcnum/write/writer.py
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import json
|
|
3
3
|
import pathlib
|
|
4
|
-
from typing import Dict, List
|
|
4
|
+
from typing import Dict, List, Tuple
|
|
5
5
|
import warnings
|
|
6
6
|
|
|
7
7
|
import h5py
|
|
8
8
|
import hdf5plugin
|
|
9
9
|
import numpy as np
|
|
10
10
|
|
|
11
|
+
from ..read import HDF5Data
|
|
11
12
|
from .._version import version
|
|
12
13
|
|
|
13
14
|
|
|
@@ -16,6 +17,11 @@ class CreatingFileWithoutBasinWarning(UserWarning):
|
|
|
16
17
|
pass
|
|
17
18
|
|
|
18
19
|
|
|
20
|
+
class IgnoringBasinTypeWarning(UserWarning):
|
|
21
|
+
"""Issued when a specific basin type is ignored"""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
19
25
|
class HDF5Writer:
|
|
20
26
|
def __init__(self,
|
|
21
27
|
# TODO: make this a mandatory argument when `path` is
|
|
@@ -90,14 +96,42 @@ class HDF5Writer:
|
|
|
90
96
|
chunk_size_int = max(10, int(np.floor(chunk_size)))
|
|
91
97
|
return tuple([chunk_size_int] + list(item_shape))
|
|
92
98
|
|
|
93
|
-
def require_feature(self,
|
|
94
|
-
|
|
95
|
-
|
|
99
|
+
def require_feature(self,
|
|
100
|
+
feat: str,
|
|
101
|
+
item_shape: Tuple[int],
|
|
102
|
+
feat_dtype: np.dtype,
|
|
103
|
+
ds_kwds: Dict = None,
|
|
104
|
+
group_name: str = "events"):
|
|
105
|
+
"""Create a new feature in the "events" group
|
|
106
|
+
|
|
107
|
+
Parameters
|
|
108
|
+
----------
|
|
109
|
+
feat: str
|
|
110
|
+
name of the feature
|
|
111
|
+
item_shape: Tuple[int]
|
|
112
|
+
shape for one event of this feature, e.g. for a scalar
|
|
113
|
+
event, the shape would be `(1,)` and for an image, the
|
|
114
|
+
shape could be `(80, 300)`.
|
|
115
|
+
feat_dtype: np.dtype
|
|
116
|
+
dtype of the feature
|
|
117
|
+
ds_kwds: Dict
|
|
118
|
+
HDF5 Dataset keyword arguments (e.g. compression, fletcher32)
|
|
119
|
+
group_name: str
|
|
120
|
+
name of the HDF5 group where the feature should be written to;
|
|
121
|
+
defaults to the "events" group, but a different group can be
|
|
122
|
+
specified for storing e.g. internal basin features.
|
|
123
|
+
"""
|
|
124
|
+
if group_name == "events":
|
|
125
|
+
egroup = self.events
|
|
126
|
+
else:
|
|
127
|
+
egroup = self.h5.require_group(group_name)
|
|
128
|
+
|
|
129
|
+
if feat not in egroup:
|
|
96
130
|
if ds_kwds is None:
|
|
97
131
|
ds_kwds = {}
|
|
98
132
|
for key in self.ds_kwds:
|
|
99
133
|
ds_kwds.setdefault(key, self.ds_kwds[key])
|
|
100
|
-
dset =
|
|
134
|
+
dset = egroup.create_dataset(
|
|
101
135
|
feat,
|
|
102
136
|
shape=tuple([0] + list(item_shape)),
|
|
103
137
|
dtype=feat_dtype,
|
|
@@ -112,16 +146,17 @@ class HDF5Writer:
|
|
|
112
146
|
np.string_('IMAGE_GRAYSCALE'))
|
|
113
147
|
offset = 0
|
|
114
148
|
else:
|
|
115
|
-
dset =
|
|
149
|
+
dset = egroup[feat]
|
|
116
150
|
offset = dset.shape[0]
|
|
117
151
|
return dset, offset
|
|
118
152
|
|
|
119
153
|
def store_basin(self,
|
|
120
154
|
name: str,
|
|
121
|
-
paths: List[str | pathlib.Path],
|
|
155
|
+
paths: List[str | pathlib.Path] | None = None,
|
|
122
156
|
features: List[str] = None,
|
|
123
157
|
description: str | None = None,
|
|
124
|
-
mapping: np.ndarray = None
|
|
158
|
+
mapping: np.ndarray = None,
|
|
159
|
+
internal_data: Dict | None = None,
|
|
125
160
|
):
|
|
126
161
|
"""Write an HDF5-based file basin
|
|
127
162
|
|
|
@@ -129,8 +164,9 @@ class HDF5Writer:
|
|
|
129
164
|
----------
|
|
130
165
|
name: str
|
|
131
166
|
basin name; Names do not have to be unique.
|
|
132
|
-
paths: list of str or pathlib.Path
|
|
133
|
-
location(s) of the basin
|
|
167
|
+
paths: list of str or pathlib.Path or None
|
|
168
|
+
location(s) of the basin; must be None when storing internal
|
|
169
|
+
data, a list of paths otherwise
|
|
134
170
|
features: list of str
|
|
135
171
|
list of features provided by `paths`
|
|
136
172
|
description: str
|
|
@@ -138,14 +174,39 @@ class HDF5Writer:
|
|
|
138
174
|
mapping: 1D array
|
|
139
175
|
integer array with indices that map the basin dataset
|
|
140
176
|
to this dataset
|
|
177
|
+
internal_data: dict of ndarrays
|
|
178
|
+
internal basin data to store; If this is set, then `features`
|
|
179
|
+
and `paths` must be set to `None`.
|
|
141
180
|
"""
|
|
142
181
|
bdat = {
|
|
143
182
|
"description": description,
|
|
144
|
-
"format": "hdf5",
|
|
145
183
|
"name": name,
|
|
146
|
-
"paths": [str(pp) for pp in paths],
|
|
147
|
-
"type": "file",
|
|
148
184
|
}
|
|
185
|
+
|
|
186
|
+
if internal_data:
|
|
187
|
+
if features is not None:
|
|
188
|
+
raise ValueError("`features` must be set to None when storing "
|
|
189
|
+
"internal basin features")
|
|
190
|
+
if paths is not None:
|
|
191
|
+
raise ValueError("`paths` must be set to None when storing "
|
|
192
|
+
"internal basin features")
|
|
193
|
+
# store the internal basin information
|
|
194
|
+
for feat in internal_data:
|
|
195
|
+
if feat in self.h5.require_group("basin_events"):
|
|
196
|
+
raise ValueError(f"Feature '{feat}' is already defined "
|
|
197
|
+
f"as an internal basin feature")
|
|
198
|
+
self.store_feature_chunk(feat=feat,
|
|
199
|
+
data=internal_data[feat],
|
|
200
|
+
group_name="basin_events")
|
|
201
|
+
features = sorted(internal_data.keys())
|
|
202
|
+
bdat["format"] = "h5dataset"
|
|
203
|
+
bdat["paths"] = ["basin_events"]
|
|
204
|
+
bdat["type"] = "internal"
|
|
205
|
+
else:
|
|
206
|
+
bdat["format"] = "hdf5"
|
|
207
|
+
bdat["paths"] = [str(pp) for pp in paths]
|
|
208
|
+
bdat["type"] = "file"
|
|
209
|
+
|
|
149
210
|
# Explicit features stored in basin file
|
|
150
211
|
if features is not None and len(features):
|
|
151
212
|
bdat["features"] = features
|
|
@@ -195,7 +256,7 @@ class HDF5Writer:
|
|
|
195
256
|
chunks=True,
|
|
196
257
|
**self.ds_kwds)
|
|
197
258
|
|
|
198
|
-
def store_feature_chunk(self, feat, data):
|
|
259
|
+
def store_feature_chunk(self, feat, data, group_name="events"):
|
|
199
260
|
"""Store feature data
|
|
200
261
|
|
|
201
262
|
The "chunk" implies that always chunks of data are stored,
|
|
@@ -205,7 +266,8 @@ class HDF5Writer:
|
|
|
205
266
|
data = 255 * np.array(data, dtype=np.uint8)
|
|
206
267
|
ds, offset = self.require_feature(feat=feat,
|
|
207
268
|
item_shape=data.shape[1:],
|
|
208
|
-
feat_dtype=data.dtype
|
|
269
|
+
feat_dtype=data.dtype,
|
|
270
|
+
group_name=group_name)
|
|
209
271
|
dsize = data.shape[0]
|
|
210
272
|
ds.resize(offset + dsize, axis=0)
|
|
211
273
|
ds[offset:offset + dsize] = data
|
|
@@ -291,14 +353,17 @@ def create_with_basins(
|
|
|
291
353
|
# copy metadata
|
|
292
354
|
with h5py.File(prep, libver="latest") as h5:
|
|
293
355
|
copy_metadata(h5_src=h5, h5_dst=hw.h5)
|
|
356
|
+
copy_basins(h5_src=h5, h5_dst=hw.h5)
|
|
294
357
|
# extract features
|
|
295
358
|
features = sorted(h5["events"].keys())
|
|
359
|
+
features = [f for f in features if
|
|
360
|
+
not f.startswith("basinmap")]
|
|
296
361
|
name = prep.name
|
|
297
362
|
else:
|
|
298
363
|
features = None
|
|
299
364
|
name = bps[0]
|
|
300
365
|
|
|
301
|
-
#
|
|
366
|
+
# Write the basin data
|
|
302
367
|
hw.store_basin(name=name,
|
|
303
368
|
paths=bps,
|
|
304
369
|
features=features,
|
|
@@ -306,6 +371,44 @@ def create_with_basins(
|
|
|
306
371
|
)
|
|
307
372
|
|
|
308
373
|
|
|
374
|
+
def copy_basins(h5_src: h5py.File,
|
|
375
|
+
h5_dst: h5py.File,
|
|
376
|
+
internal_basins: bool = True
|
|
377
|
+
):
|
|
378
|
+
"""Reassemble basin data in the output file
|
|
379
|
+
|
|
380
|
+
This does not just copy the datasets defined in the "basins"
|
|
381
|
+
group, but it also loads the "basinmap?" features and stores
|
|
382
|
+
them as new "basinmap?" features in the output file.
|
|
383
|
+
"""
|
|
384
|
+
basins = HDF5Data.extract_basin_dicts(h5_src, check=False)
|
|
385
|
+
hw = HDF5Writer(h5_dst)
|
|
386
|
+
for bn_dict in basins:
|
|
387
|
+
if bn_dict["type"] == "internal" and internal_basins:
|
|
388
|
+
internal_data = {}
|
|
389
|
+
for feat in bn_dict["features"]:
|
|
390
|
+
internal_data[feat] = h5_src["basin_events"][feat]
|
|
391
|
+
hw.store_basin(name=bn_dict["name"],
|
|
392
|
+
description=bn_dict["description"],
|
|
393
|
+
mapping=h5_src["events"][bn_dict["mapping"]][:],
|
|
394
|
+
internal_data=internal_data,
|
|
395
|
+
)
|
|
396
|
+
elif bn_dict["type"] == "file":
|
|
397
|
+
if bn_dict.get("mapping") is not None:
|
|
398
|
+
mapping = h5_src["events"][bn_dict["mapping"]][:]
|
|
399
|
+
else:
|
|
400
|
+
mapping = None
|
|
401
|
+
hw.store_basin(name=bn_dict["name"],
|
|
402
|
+
description=bn_dict["description"],
|
|
403
|
+
paths=bn_dict["paths"],
|
|
404
|
+
features=bn_dict["features"],
|
|
405
|
+
mapping=mapping,
|
|
406
|
+
)
|
|
407
|
+
else:
|
|
408
|
+
warnings.warn(f"Ignored basin of type '{bn_dict['type']}'",
|
|
409
|
+
IgnoringBasinTypeWarning)
|
|
410
|
+
|
|
411
|
+
|
|
309
412
|
def copy_features(h5_src: h5py.File,
|
|
310
413
|
h5_dst: h5py.File,
|
|
311
414
|
features: List[str],
|
|
@@ -370,9 +473,9 @@ def copy_features(h5_src: h5py.File,
|
|
|
370
473
|
|
|
371
474
|
|
|
372
475
|
def copy_metadata(h5_src: h5py.File,
|
|
373
|
-
h5_dst: h5py.File
|
|
374
|
-
|
|
375
|
-
"""Copy attributes, tables,
|
|
476
|
+
h5_dst: h5py.File
|
|
477
|
+
):
|
|
478
|
+
"""Copy attributes, tables, and logs from one H5File to another
|
|
376
479
|
|
|
377
480
|
Notes
|
|
378
481
|
-----
|
|
@@ -386,8 +489,6 @@ def copy_metadata(h5_src: h5py.File,
|
|
|
386
489
|
for kk in src_attrs:
|
|
387
490
|
h5_dst.attrs.setdefault(kk, src_attrs[kk])
|
|
388
491
|
copy_data = ["logs", "tables"]
|
|
389
|
-
if copy_basins:
|
|
390
|
-
copy_data.append("basins")
|
|
391
492
|
# copy other metadata
|
|
392
493
|
for topic in copy_data:
|
|
393
494
|
if topic in h5_src:
|
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
dcnum/__init__.py,sha256=hcawIKS7utYiOyVhOAX9t7K3xYzP1b9862VV0b6qSrQ,74
|
|
2
|
-
dcnum/_version.py,sha256=
|
|
2
|
+
dcnum/_version.py,sha256=nhq2LDshqyJE9A3DLIz_6mBkgGLKSQrYmHfzZ_jVCbc,413
|
|
3
3
|
dcnum/feat/__init__.py,sha256=jUJYWTD3VIoDNKrmryXbjHb1rGwYtK4b7VPWihYgUoo,325
|
|
4
4
|
dcnum/feat/event_extractor_manager_thread.py,sha256=FAxSyRfaNAuBWNplxHngp5h-44s0qIP24XX_oETdfMk,7836
|
|
5
5
|
dcnum/feat/gate.py,sha256=Yhxq80JoRMmQzBxl35C8NT91c9QcmQa-EIKLuxK6WvE,7221
|
|
6
6
|
dcnum/feat/queue_event_extractor.py,sha256=0ncTQleT1sfc98zYkFuZWxU-akecfTrW6-OOU3z-d8o,15698
|
|
7
7
|
dcnum/feat/feat_background/__init__.py,sha256=OTmMuazHNaSrZb2XW4cnJ6PlgJLbKrPbaidpEixYa0A,341
|
|
8
|
-
dcnum/feat/feat_background/base.py,sha256=
|
|
8
|
+
dcnum/feat/feat_background/base.py,sha256=bQBPvztrku-8YSVk8YBUUNh7MaYcnztgyD2-dQHxpzw,8674
|
|
9
9
|
dcnum/feat/feat_background/bg_copy.py,sha256=PK8x4_Uph-_A6uszZC5uhe1gD1dSRdHnDMEsN0HSGHA,1034
|
|
10
10
|
dcnum/feat/feat_background/bg_roll_median.py,sha256=EyjstMDXFBYuJB1lN6g4Uw7tPm434X3hXQxKSqvcoJ4,13175
|
|
11
|
-
dcnum/feat/feat_background/bg_sparse_median.py,sha256=
|
|
11
|
+
dcnum/feat/feat_background/bg_sparse_median.py,sha256=wt7lvPSiZkdaerRErgezd_YDVxHA2kAqO1LMX9PuHJk,22053
|
|
12
12
|
dcnum/feat/feat_brightness/__init__.py,sha256=o6AebVlmydwNgVF5kW6ITqJyFreoKrU3Ki_3EC8If-s,155
|
|
13
13
|
dcnum/feat/feat_brightness/bright_all.py,sha256=vf8xaYBdKD24hHUXdkI0_S7nbr7m49KW6gvuWvbHDVg,4545
|
|
14
14
|
dcnum/feat/feat_brightness/common.py,sha256=JX49EszYDmnvoOKXFVV1CalEIWRmOuY5EryNbqGbdac,156
|
|
@@ -20,17 +20,18 @@ dcnum/feat/feat_texture/__init__.py,sha256=6StM9S540UVtdFFR3bHa7nfCTomeVdoo7Uy9C
|
|
|
20
20
|
dcnum/feat/feat_texture/common.py,sha256=COXHpXS-7DMouGu3WF83I76L02Sr7P9re4lxajh6g0E,439
|
|
21
21
|
dcnum/feat/feat_texture/tex_all.py,sha256=_5H3sXYRN0Uq2eUHn3XUyEHkU_tncEqbqJTC-HZcnGY,5198
|
|
22
22
|
dcnum/logic/__init__.py,sha256=7J3GrwJInNQbrLk61HRIV7X7p69TAIbMYpR34hh6u14,177
|
|
23
|
-
dcnum/logic/ctrl.py,sha256=
|
|
23
|
+
dcnum/logic/ctrl.py,sha256=3ovEkaxy4ASpXvLsZMO0pSueqZfQgLkoy3fnH2Gweq8,38791
|
|
24
24
|
dcnum/logic/job.py,sha256=9BN2WjYqjjJuLnfNZAtQ2Nn47Glo2jVrivDodGJoqlQ,7713
|
|
25
25
|
dcnum/logic/json_encoder.py,sha256=cxMnqisbKEVf-rVcw6rK2BBAb6iz_hKFaGl81kK36lQ,571
|
|
26
26
|
dcnum/meta/__init__.py,sha256=AVqRgyKXO1orKnE305h88IBvoZ1oz6X11HN1WP5nGvg,60
|
|
27
27
|
dcnum/meta/paths.py,sha256=J_ikeHzd7gEeRgAKjuayz3x6q4h1fOiDadM-ZxhAGm4,1053
|
|
28
|
-
dcnum/meta/ppid.py,sha256=
|
|
29
|
-
dcnum/read/__init__.py,sha256=
|
|
30
|
-
dcnum/read/cache.py,sha256=
|
|
28
|
+
dcnum/meta/ppid.py,sha256=RnDkJSdV1kDznAsOhQN5WI7uC9UwSMCjyADP7yWNvkM,8478
|
|
29
|
+
dcnum/read/__init__.py,sha256=LYHyZHgiNTpjV5oEcty-7Kh5topLpHT_cFlNl-QX8gg,262
|
|
30
|
+
dcnum/read/cache.py,sha256=LNA5nnDyrw8Nj07E7XfG2GcHEoWm6vA6Qo_8N-n-sGw,6492
|
|
31
31
|
dcnum/read/const.py,sha256=GG9iyXDtEldvJYOBnhZjlimzIeBMAt4bSr2-xn2gzzc,464
|
|
32
|
-
dcnum/read/
|
|
33
|
-
dcnum/read/
|
|
32
|
+
dcnum/read/detect_flicker.py,sha256=CeUyxI6LaX_lCNvBPm_yzsiWmiNcZYqbNZCtvKPdkcU,1827
|
|
33
|
+
dcnum/read/hdf5_data.py,sha256=JVk9YWw1rPgTPxaMZsw2ehk4FJq9UqhmB1SW7yhPw50,25867
|
|
34
|
+
dcnum/read/mapped.py,sha256=zU2fYdZfLNHn0rKHxDzBhNFMu4--WWa8nSeE2likyZA,3637
|
|
34
35
|
dcnum/segm/__init__.py,sha256=9cLEAd3JWE8IGqDHV-eSDIYOGBfOepd8OcebtNs8Omk,309
|
|
35
36
|
dcnum/segm/segm_thresh.py,sha256=iVhvIhzO0Gw0t3rXOgH71rOI0CNjJJQq4Gg6BulUhK8,948
|
|
36
37
|
dcnum/segm/segmenter.py,sha256=FWLFDBR-x_85ku2rObA2F-QBrM4IUaUL-YHChLagVvM,14902
|
|
@@ -44,12 +45,12 @@ dcnum/segm/segm_torch/segm_torch_sto.py,sha256=PTOJrP_FkaxZZul8lM4VA2HL3KyxrheDD
|
|
|
44
45
|
dcnum/segm/segm_torch/torch_model.py,sha256=5aL6SwSvg1N2gATEGBhP3aA4WTHlvGzQVYuizmh0LrU,3187
|
|
45
46
|
dcnum/segm/segm_torch/torch_postproc.py,sha256=ctirQTmsZnuZGIxkwFWN9arRneHRYJUxaJ_ZyCgjByM,3311
|
|
46
47
|
dcnum/segm/segm_torch/torch_preproc.py,sha256=kjabu76paw23kO7RP7Ik6IY60Kk1VBAHKBAedflA0aQ,4002
|
|
47
|
-
dcnum/write/__init__.py,sha256=
|
|
48
|
+
dcnum/write/__init__.py,sha256=sK79IlvCFIqf2oFABVeyYedMnHOsEIQpxAauEeNO-Tw,273
|
|
48
49
|
dcnum/write/deque_writer_thread.py,sha256=ao7F1yrVKyufgC4rC0Y2_Vt7snuT6KpI7W2qVxcjdhk,1994
|
|
49
50
|
dcnum/write/queue_collector_thread.py,sha256=d_WfdsZdFnFsiAY0zVMwUlA4juIMeiWYmE_-rezBQCE,11734
|
|
50
|
-
dcnum/write/writer.py,sha256=
|
|
51
|
-
dcnum-0.
|
|
52
|
-
dcnum-0.
|
|
53
|
-
dcnum-0.
|
|
54
|
-
dcnum-0.
|
|
55
|
-
dcnum-0.
|
|
51
|
+
dcnum/write/writer.py,sha256=sFt4O9O4uUabxmBZu776b5pBf_DPDpsz-jLpSsswQ1g,20531
|
|
52
|
+
dcnum-0.25.0.dist-info/LICENSE,sha256=YRChA1C8A2E-amJbudwMcbTCZy_HzmeY0hMIvduh1MM,1089
|
|
53
|
+
dcnum-0.25.0.dist-info/METADATA,sha256=CNZS4i26iGkFnwde0p0I8xzD80cRqKrDRQssMXnqkSo,2280
|
|
54
|
+
dcnum-0.25.0.dist-info/WHEEL,sha256=Wyh-_nZ0DJYolHNn1_hMa4lM7uDedD_RGVwbmTjyItk,91
|
|
55
|
+
dcnum-0.25.0.dist-info/top_level.txt,sha256=Hmh38rgG_MFTVDpUDGuO2HWTSq80P585Het4COQzFTg,6
|
|
56
|
+
dcnum-0.25.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|