dcnum 0.13.2__py3-none-any.whl → 0.23.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dcnum might be problematic. Click here for more details.
- dcnum/_version.py +2 -2
- dcnum/feat/__init__.py +2 -1
- dcnum/feat/event_extractor_manager_thread.py +67 -33
- dcnum/feat/feat_background/__init__.py +3 -12
- dcnum/feat/feat_background/base.py +80 -65
- dcnum/feat/feat_background/bg_copy.py +31 -0
- dcnum/feat/feat_background/bg_roll_median.py +38 -30
- dcnum/feat/feat_background/bg_sparse_median.py +96 -45
- dcnum/feat/feat_brightness/__init__.py +1 -0
- dcnum/feat/feat_brightness/bright_all.py +41 -6
- dcnum/feat/feat_contour/__init__.py +4 -0
- dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
- dcnum/feat/feat_contour/volume.py +174 -0
- dcnum/feat/feat_texture/__init__.py +1 -0
- dcnum/feat/feat_texture/tex_all.py +28 -1
- dcnum/feat/gate.py +92 -70
- dcnum/feat/queue_event_extractor.py +139 -70
- dcnum/logic/__init__.py +5 -0
- dcnum/logic/ctrl.py +794 -0
- dcnum/logic/job.py +184 -0
- dcnum/logic/json_encoder.py +19 -0
- dcnum/meta/__init__.py +1 -0
- dcnum/meta/paths.py +30 -0
- dcnum/meta/ppid.py +66 -9
- dcnum/read/__init__.py +1 -0
- dcnum/read/cache.py +109 -77
- dcnum/read/const.py +6 -4
- dcnum/read/hdf5_data.py +190 -31
- dcnum/read/mapped.py +87 -0
- dcnum/segm/__init__.py +6 -15
- dcnum/segm/segm_thresh.py +7 -14
- dcnum/segm/segm_torch/__init__.py +19 -0
- dcnum/segm/segm_torch/segm_torch_base.py +125 -0
- dcnum/segm/segm_torch/segm_torch_mpo.py +71 -0
- dcnum/segm/segm_torch/segm_torch_sto.py +88 -0
- dcnum/segm/segm_torch/torch_model.py +95 -0
- dcnum/segm/segm_torch/torch_postproc.py +93 -0
- dcnum/segm/segm_torch/torch_preproc.py +114 -0
- dcnum/segm/segmenter.py +245 -96
- dcnum/segm/segmenter_manager_thread.py +39 -28
- dcnum/segm/{segmenter_cpu.py → segmenter_mpo.py} +137 -43
- dcnum/segm/segmenter_sto.py +110 -0
- dcnum/write/__init__.py +3 -1
- dcnum/write/deque_writer_thread.py +15 -5
- dcnum/write/queue_collector_thread.py +14 -17
- dcnum/write/writer.py +225 -55
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/METADATA +4 -2
- dcnum-0.23.1.dist-info/RECORD +55 -0
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/WHEEL +1 -1
- dcnum/feat/feat_moments/__init__.py +0 -3
- dcnum/segm/segmenter_gpu.py +0 -45
- dcnum-0.13.2.dist-info/RECORD +0 -40
- /dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/LICENSE +0 -0
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/top_level.txt +0 -0
dcnum/_version.py
CHANGED
dcnum/feat/__init__.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
# flake8: noqa: F401
|
|
2
|
-
|
|
2
|
+
"""Feature computation"""
|
|
3
|
+
from . import feat_background, feat_brightness, feat_contour, feat_texture
|
|
3
4
|
from .event_extractor_manager_thread import EventExtractorManagerThread
|
|
4
5
|
from .queue_event_extractor import (
|
|
5
6
|
QueueEventExtractor, EventExtractorThread, EventExtractorProcess
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
"""Feature computation: managing event extraction threads"""
|
|
2
|
+
import collections
|
|
1
3
|
import logging
|
|
2
4
|
import multiprocessing as mp
|
|
3
5
|
import threading
|
|
@@ -16,6 +18,7 @@ class EventExtractorManagerThread(threading.Thread):
|
|
|
16
18
|
labels_list: List,
|
|
17
19
|
fe_kwargs: Dict,
|
|
18
20
|
num_workers: int,
|
|
21
|
+
writer_dq: collections.deque,
|
|
19
22
|
debug: bool = False,
|
|
20
23
|
*args, **kwargs):
|
|
21
24
|
"""Manage event extraction threads or precesses
|
|
@@ -39,9 +42,12 @@ class EventExtractorManagerThread(threading.Thread):
|
|
|
39
42
|
:func:`.EventExtractor.get_init_kwargs` for more information.
|
|
40
43
|
num_workers:
|
|
41
44
|
Number of child threads or worker processes to use.
|
|
45
|
+
writer_dq:
|
|
46
|
+
The queue the writer uses. We monitor this queue. If it
|
|
47
|
+
fills up, we take a break.
|
|
42
48
|
debug:
|
|
43
|
-
Whether to run in debugging mode which means
|
|
44
|
-
|
|
49
|
+
Whether to run in debugging mode which means only one
|
|
50
|
+
event extraction thread (`num_workers` has no effect).
|
|
45
51
|
"""
|
|
46
52
|
super(EventExtractorManagerThread, self).__init__(
|
|
47
53
|
name="EventExtractorManager", *args, **kwargs)
|
|
@@ -65,6 +71,8 @@ class EventExtractorManagerThread(threading.Thread):
|
|
|
65
71
|
self.label_array = np.ctypeslib.as_array(
|
|
66
72
|
self.fe_kwargs["label_array"]).reshape(
|
|
67
73
|
self.data.image.chunk_shape)
|
|
74
|
+
#: Writer deque to monitor
|
|
75
|
+
self.writer_dq = writer_dq
|
|
68
76
|
#: Time counter for feature extraction
|
|
69
77
|
self.t_count = 0
|
|
70
78
|
#: Whether debugging is enabled
|
|
@@ -76,30 +84,52 @@ class EventExtractorManagerThread(threading.Thread):
|
|
|
76
84
|
worker_cls = EventExtractorThread
|
|
77
85
|
else:
|
|
78
86
|
worker_cls = EventExtractorProcess
|
|
79
|
-
workers = [worker_cls(*list(self.fe_kwargs.values()))
|
|
80
|
-
for
|
|
87
|
+
workers = [worker_cls(*list(self.fe_kwargs.values()), worker_index=ii)
|
|
88
|
+
for ii in range(self.num_workers)]
|
|
81
89
|
[w.start() for w in workers]
|
|
90
|
+
worker_monitor = self.fe_kwargs["worker_monitor"]
|
|
82
91
|
|
|
92
|
+
num_slots = len(self.slot_states)
|
|
83
93
|
chunks_processed = 0
|
|
94
|
+
frames_processed = 0
|
|
84
95
|
while True:
|
|
85
|
-
|
|
86
|
-
|
|
96
|
+
# If the writer_dq starts filling up, then this could lead to
|
|
97
|
+
# an oom-kill signal. Stall for the writer to prevent this.
|
|
98
|
+
if (ldq := len(self.writer_dq)) > 1000:
|
|
99
|
+
time.sleep(1)
|
|
100
|
+
ldq2 = len(self.writer_dq)
|
|
101
|
+
stall_time = max(0., (ldq2 - 200) / ((ldq - ldq2) or 1))
|
|
102
|
+
time.sleep(stall_time)
|
|
103
|
+
self.logger.warning(
|
|
104
|
+
f"Stalled {stall_time + 1:.1f}s for slow writer "
|
|
105
|
+
f"({ldq} chunks queued)")
|
|
106
|
+
|
|
87
107
|
unavailable_slots = 0
|
|
108
|
+
found_free_slot = False
|
|
88
109
|
# Check all slots for segmented labels
|
|
89
|
-
while
|
|
90
|
-
#
|
|
91
|
-
#
|
|
92
|
-
#
|
|
93
|
-
#
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
110
|
+
while not found_free_slot:
|
|
111
|
+
# We sort the slots according to the slot chunks so that we
|
|
112
|
+
# always process the slot with the smallest slot chunk number
|
|
113
|
+
# first. Initially, the slot_chunks array is filled with
|
|
114
|
+
# zeros, but the segmenter fills up the slots with the lowest
|
|
115
|
+
# number first.
|
|
116
|
+
for cur_slot in np.argsort(self.slot_chunks):
|
|
117
|
+
# - "e" there is data from the segmenter (the extractor
|
|
118
|
+
# can take it and process it)
|
|
119
|
+
# - "s" the extractor processed the data and is waiting
|
|
120
|
+
# for the segmenter
|
|
121
|
+
if self.slot_states[cur_slot] == "e":
|
|
122
|
+
# The segmenter has something for us in this slot.
|
|
123
|
+
found_free_slot = True
|
|
124
|
+
break
|
|
125
|
+
else:
|
|
126
|
+
# Try another slot.
|
|
127
|
+
unavailable_slots += 1
|
|
128
|
+
cur_slot = (cur_slot + 1) % num_slots
|
|
129
|
+
if unavailable_slots >= num_slots:
|
|
130
|
+
# There is nothing to do, try to avoid 100% CPU
|
|
131
|
+
unavailable_slots = 0
|
|
132
|
+
time.sleep(.1)
|
|
103
133
|
|
|
104
134
|
t1 = time.monotonic()
|
|
105
135
|
|
|
@@ -114,34 +144,38 @@ class EventExtractorManagerThread(threading.Thread):
|
|
|
114
144
|
self.label_array[len(new_labels):] = 0
|
|
115
145
|
else:
|
|
116
146
|
raise ValueError("labels_list contains bad size data!")
|
|
147
|
+
|
|
117
148
|
# Let the workers know there is work
|
|
118
|
-
|
|
119
|
-
|
|
149
|
+
chunk_size = self.data.image.get_chunk_size(chunk)
|
|
150
|
+
[self.raw_queue.put((chunk, ii)) for ii in range(chunk_size)]
|
|
120
151
|
|
|
121
152
|
# Make sure the entire chunk has been processed.
|
|
122
|
-
while
|
|
153
|
+
while np.sum(worker_monitor) != frames_processed + chunk_size:
|
|
123
154
|
time.sleep(.1)
|
|
124
155
|
|
|
125
156
|
# We are done here. The segmenter may continue its deed.
|
|
126
157
|
self.slot_states[cur_slot] = "w"
|
|
127
158
|
|
|
128
|
-
self.logger.debug(f"Extracted
|
|
159
|
+
self.logger.debug(f"Extracted chunk {chunk} in slot {cur_slot}")
|
|
129
160
|
self.t_count += time.monotonic() - t1
|
|
130
161
|
|
|
131
162
|
chunks_processed += 1
|
|
163
|
+
frames_processed += chunk_size
|
|
132
164
|
|
|
133
165
|
if chunks_processed == self.data.image.num_chunks:
|
|
134
166
|
break
|
|
135
167
|
|
|
136
|
-
self.
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
168
|
+
inv_masks = self.fe_kwargs["invalid_mask_counter"].value
|
|
169
|
+
if inv_masks:
|
|
170
|
+
self.logger.info(f"Encountered {inv_masks} invalid masks")
|
|
171
|
+
inv_frac = inv_masks / len(self.data)
|
|
172
|
+
if inv_frac > 0.005: # warn above one half percent
|
|
173
|
+
self.logger.warning(f"Discarded {inv_frac:.1%} of the masks, "
|
|
174
|
+
f"please check segmenter applicability")
|
|
175
|
+
|
|
176
|
+
self.logger.debug("Requesting extraction workers to join")
|
|
144
177
|
self.fe_kwargs["finalize_extraction"].value = True
|
|
145
178
|
[w.join() for w in workers]
|
|
146
|
-
|
|
179
|
+
|
|
180
|
+
self.logger.debug("Finished extraction")
|
|
147
181
|
self.logger.info(f"Extraction time: {self.t_count:.1f}s")
|
|
@@ -1,16 +1,7 @@
|
|
|
1
1
|
# flake8: noqa: F401
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from .base import Background
|
|
2
|
+
"""Feature computation: background image data from image data"""
|
|
3
|
+
from .base import Background, get_available_background_methods
|
|
5
4
|
# Background methods are registered by importing them here.
|
|
5
|
+
from .bg_copy import BackgroundCopy
|
|
6
6
|
from .bg_roll_median import BackgroundRollMed
|
|
7
7
|
from .bg_sparse_median import BackgroundSparseMed
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
@functools.cache
|
|
11
|
-
def get_available_background_methods():
|
|
12
|
-
"""Return dictionary of background computation methods"""
|
|
13
|
-
methods = {}
|
|
14
|
-
for cls in Background.__subclasses__():
|
|
15
|
-
methods[cls.key()] = cls
|
|
16
|
-
return methods
|
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
import abc
|
|
2
|
+
import functools
|
|
2
3
|
import inspect
|
|
3
4
|
import multiprocessing as mp
|
|
4
5
|
import pathlib
|
|
5
|
-
import uuid
|
|
6
6
|
|
|
7
7
|
import h5py
|
|
8
|
-
import hdf5plugin
|
|
9
|
-
import numpy as np
|
|
10
8
|
|
|
11
9
|
from ...meta import ppid
|
|
12
10
|
from ...read import HDF5Data
|
|
13
|
-
from ...write import create_with_basins
|
|
11
|
+
from ...write import HDF5Writer, create_with_basins, set_default_filter_kwargs
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# All subprocesses should use 'spawn' to avoid issues with threads
|
|
15
|
+
# and 'fork' on POSIX systems.
|
|
16
|
+
mp_spawn = mp.get_context('spawn')
|
|
14
17
|
|
|
15
18
|
|
|
16
19
|
class Background(abc.ABC):
|
|
@@ -53,12 +56,14 @@ class Background(abc.ABC):
|
|
|
53
56
|
self.kwargs.update(kwargs)
|
|
54
57
|
|
|
55
58
|
if num_cpus is None:
|
|
56
|
-
num_cpus =
|
|
59
|
+
num_cpus = mp_spawn.cpu_count()
|
|
57
60
|
#: number of CPUs used
|
|
58
61
|
self.num_cpus = num_cpus
|
|
59
62
|
|
|
60
|
-
#: number of
|
|
61
|
-
self.
|
|
63
|
+
#: number of images in the input data
|
|
64
|
+
self.image_count = None
|
|
65
|
+
#: fraction images that have been processed
|
|
66
|
+
self.image_proc = mp_spawn.Value("d", 0)
|
|
62
67
|
|
|
63
68
|
#: HDF5Data instance for input data
|
|
64
69
|
self.hdin = None
|
|
@@ -86,12 +91,10 @@ class Background(abc.ABC):
|
|
|
86
91
|
else:
|
|
87
92
|
self.input_data = input_data
|
|
88
93
|
|
|
89
|
-
#: unique identifier
|
|
90
|
-
self.name = str(uuid.uuid4())
|
|
91
94
|
#: shape of event images
|
|
92
95
|
self.image_shape = self.input_data[0].shape
|
|
93
96
|
#: total number of events
|
|
94
|
-
self.
|
|
97
|
+
self.image_count = len(self.input_data)
|
|
95
98
|
|
|
96
99
|
if self.h5out is None:
|
|
97
100
|
if not output_path.exists():
|
|
@@ -102,66 +105,23 @@ class Background(abc.ABC):
|
|
|
102
105
|
# "a", because output file already exists
|
|
103
106
|
self.h5out = h5py.File(output_path, "a", libver="latest")
|
|
104
107
|
|
|
105
|
-
# Initialize
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
compression_kwargs = {}
|
|
110
|
-
h5bg = self.h5out.require_dataset(
|
|
111
|
-
"events/image_bg",
|
|
112
|
-
shape=self.input_data.shape,
|
|
113
|
-
dtype=np.uint8,
|
|
114
|
-
chunks=(min(100, self.event_count),
|
|
115
|
-
self.image_shape[0],
|
|
116
|
-
self.image_shape[1]),
|
|
117
|
-
fletcher32=True,
|
|
118
|
-
**compression_kwargs,
|
|
108
|
+
# Initialize writer
|
|
109
|
+
self.writer = HDF5Writer(
|
|
110
|
+
obj=self.h5out,
|
|
111
|
+
ds_kwds=set_default_filter_kwargs(compression=compress),
|
|
119
112
|
)
|
|
120
|
-
h5bg.attrs.create('CLASS', np.string_('IMAGE'))
|
|
121
|
-
h5bg.attrs.create('IMAGE_VERSION', np.string_('1.2'))
|
|
122
|
-
h5bg.attrs.create('IMAGE_SUBCLASS', np.string_('IMAGE_GRAYSCALE'))
|
|
123
113
|
|
|
124
114
|
def __enter__(self):
|
|
125
115
|
return self
|
|
126
116
|
|
|
127
117
|
def __exit__(self, type, value, tb):
|
|
118
|
+
self.writer.close()
|
|
128
119
|
# Close h5in and h5out
|
|
129
120
|
if self.hdin is not None: # we have an input file
|
|
130
121
|
self.hdin.close() # this closes self.h5in
|
|
131
122
|
if self.h5in is not self.h5out and self.h5out is not None:
|
|
132
123
|
self.h5out.close()
|
|
133
124
|
|
|
134
|
-
@staticmethod
|
|
135
|
-
def get_kwargs_from_ppid(bg_ppid):
|
|
136
|
-
"""Return keyword arguments for any subclass from a PPID string"""
|
|
137
|
-
name, pp_check_user_kwargs = bg_ppid.split(":")
|
|
138
|
-
for cls in Background.__subclasses__():
|
|
139
|
-
if cls.key() == name:
|
|
140
|
-
break
|
|
141
|
-
else:
|
|
142
|
-
raise ValueError(
|
|
143
|
-
f"Could not find background computation method '{name}'!")
|
|
144
|
-
kwargs = ppid.ppid_to_kwargs(cls=cls,
|
|
145
|
-
method="check_user_kwargs",
|
|
146
|
-
ppid=pp_check_user_kwargs)
|
|
147
|
-
return kwargs
|
|
148
|
-
|
|
149
|
-
@classmethod
|
|
150
|
-
def get_ppid_from_kwargs(cls, kwargs):
|
|
151
|
-
"""Return the PPID based on given keyword arguments for a subclass"""
|
|
152
|
-
key = cls.key()
|
|
153
|
-
cback = ppid.kwargs_to_ppid(cls, "check_user_kwargs", kwargs)
|
|
154
|
-
return ":".join([key, cback])
|
|
155
|
-
|
|
156
|
-
@classmethod
|
|
157
|
-
def key(cls):
|
|
158
|
-
if cls is Background:
|
|
159
|
-
raise ValueError("Cannot get `key` for `Background` base class!")
|
|
160
|
-
key = cls.__name__.lower()
|
|
161
|
-
if key.startswith("background"):
|
|
162
|
-
key = key[10:]
|
|
163
|
-
return key
|
|
164
|
-
|
|
165
125
|
@abc.abstractmethod
|
|
166
126
|
def check_user_kwargs(self, **kwargs):
|
|
167
127
|
"""Implement this to check the kwargs during init"""
|
|
@@ -170,7 +130,7 @@ class Background(abc.ABC):
|
|
|
170
130
|
"""Return a unique background pipeline identifier
|
|
171
131
|
|
|
172
132
|
The pipeline identifier is universally applicable and must
|
|
173
|
-
be backwards-compatible (future versions of
|
|
133
|
+
be backwards-compatible (future versions of dcnum will
|
|
174
134
|
correctly acknowledge the ID).
|
|
175
135
|
|
|
176
136
|
The segmenter pipeline ID is defined as::
|
|
@@ -186,17 +146,72 @@ class Background(abc.ABC):
|
|
|
186
146
|
|
|
187
147
|
k=100^b=10000
|
|
188
148
|
"""
|
|
189
|
-
return self.
|
|
149
|
+
return self.get_ppid_from_ppkw(self.kwargs)
|
|
150
|
+
|
|
151
|
+
@classmethod
|
|
152
|
+
def get_ppid_code(cls):
|
|
153
|
+
if cls is Background:
|
|
154
|
+
raise ValueError("Cannot get `key` for `Background` base class!")
|
|
155
|
+
key = cls.__name__.lower()
|
|
156
|
+
if key.startswith("background"):
|
|
157
|
+
key = key[10:]
|
|
158
|
+
return key
|
|
159
|
+
|
|
160
|
+
@classmethod
|
|
161
|
+
def get_ppid_from_ppkw(cls, kwargs):
|
|
162
|
+
"""Return the PPID based on given keyword arguments for a subclass"""
|
|
163
|
+
code = cls.get_ppid_code()
|
|
164
|
+
cback = ppid.kwargs_to_ppid(cls, "check_user_kwargs", kwargs)
|
|
165
|
+
return ":".join([code, cback])
|
|
166
|
+
|
|
167
|
+
@staticmethod
|
|
168
|
+
def get_ppkw_from_ppid(bg_ppid):
|
|
169
|
+
"""Return keyword arguments for any subclass from a PPID string"""
|
|
170
|
+
code, pp_check_user_kwargs = bg_ppid.split(":")
|
|
171
|
+
for bg_code in get_available_background_methods():
|
|
172
|
+
if bg_code == code:
|
|
173
|
+
cls = get_available_background_methods()[bg_code]
|
|
174
|
+
break
|
|
175
|
+
else:
|
|
176
|
+
raise ValueError(
|
|
177
|
+
f"Could not find background computation method '{code}'!")
|
|
178
|
+
kwargs = ppid.ppid_to_kwargs(cls=cls,
|
|
179
|
+
method="check_user_kwargs",
|
|
180
|
+
ppid=pp_check_user_kwargs)
|
|
181
|
+
return kwargs
|
|
182
|
+
|
|
183
|
+
def get_progress(self):
|
|
184
|
+
"""Return progress of background computation, float in [0,1]"""
|
|
185
|
+
if self.image_count == 0:
|
|
186
|
+
return 0.
|
|
187
|
+
else:
|
|
188
|
+
return self.image_proc.value
|
|
190
189
|
|
|
191
190
|
def process(self):
|
|
191
|
+
# Delete any old background data
|
|
192
|
+
for key in ["image_bg", "bg_off"]:
|
|
193
|
+
if key in self.h5out["events"]:
|
|
194
|
+
del self.h5out["events"][key]
|
|
195
|
+
# Perform the actual background computation
|
|
192
196
|
self.process_approach()
|
|
193
|
-
|
|
194
197
|
bg_ppid = self.get_ppid()
|
|
195
|
-
# Store pipeline information in the image_bg feature
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
198
|
+
# Store pipeline information in the image_bg/bg_off feature
|
|
199
|
+
for key in ["image_bg", "bg_off"]:
|
|
200
|
+
if key in self.h5out["events"]:
|
|
201
|
+
self.h5out[f"events/{key}"].attrs["dcnum ppid background"] = \
|
|
202
|
+
bg_ppid
|
|
203
|
+
self.h5out[F"events/{key}"].attrs["dcnum ppid generation"] = \
|
|
204
|
+
ppid.DCNUM_PPID_GENERATION
|
|
199
205
|
|
|
200
206
|
@abc.abstractmethod
|
|
201
207
|
def process_approach(self):
|
|
202
208
|
"""The actual background computation approach"""
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
@functools.cache
|
|
212
|
+
def get_available_background_methods():
|
|
213
|
+
"""Return dictionary of background computation methods"""
|
|
214
|
+
methods = {}
|
|
215
|
+
for cls in Background.__subclasses__():
|
|
216
|
+
methods[cls.get_ppid_code()] = cls
|
|
217
|
+
return methods
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import h5py
|
|
2
|
+
|
|
3
|
+
from .base import Background
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BackgroundCopy(Background):
|
|
7
|
+
@staticmethod
|
|
8
|
+
def check_user_kwargs():
|
|
9
|
+
pass
|
|
10
|
+
|
|
11
|
+
def process(self):
|
|
12
|
+
"""Copy input data to output dataset"""
|
|
13
|
+
if self.h5in != self.h5out:
|
|
14
|
+
hin = self.hdin.h5
|
|
15
|
+
for feat in ["image_bg", "bg_off"]:
|
|
16
|
+
if feat in hin["events"]:
|
|
17
|
+
h5py.h5o.copy(src_loc=hin["events"].id,
|
|
18
|
+
src_name=feat.encode("utf-8"),
|
|
19
|
+
dst_loc=self.h5out["events"].id,
|
|
20
|
+
dst_name=feat.encode("utf-8"),
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
# set progress to 100%
|
|
24
|
+
self.image_proc.value = 1
|
|
25
|
+
|
|
26
|
+
def process_approach(self):
|
|
27
|
+
# We do the copying in `process`, because we do not want to modify
|
|
28
|
+
# any metadata or delete datasets as is done in the base class.
|
|
29
|
+
# But we still have to implement this method, because it is an
|
|
30
|
+
# abstractmethod in the base class.
|
|
31
|
+
pass
|
|
@@ -1,16 +1,10 @@
|
|
|
1
|
-
import multiprocessing as mp
|
|
2
1
|
import queue
|
|
3
2
|
import time
|
|
4
3
|
|
|
5
4
|
import numpy as np
|
|
6
5
|
from scipy import ndimage
|
|
7
6
|
|
|
8
|
-
from .base import Background
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
# All subprocesses should use 'spawn' to avoid issues with threads
|
|
12
|
-
# and 'fork' on POSIX systems.
|
|
13
|
-
mp_spawn = mp.get_context('spawn')
|
|
7
|
+
from .base import mp_spawn, Background
|
|
14
8
|
|
|
15
9
|
|
|
16
10
|
class BackgroundRollMed(Background):
|
|
@@ -63,6 +57,11 @@ class BackgroundRollMed(Background):
|
|
|
63
57
|
kernel_size=kernel_size,
|
|
64
58
|
batch_size=batch_size)
|
|
65
59
|
|
|
60
|
+
if kernel_size > len(self.input_data):
|
|
61
|
+
raise ValueError(f"Cannot compute background when the input data "
|
|
62
|
+
f"size {len(self.input_data)} is larger than the "
|
|
63
|
+
f"kernel size {kernel_size}!")
|
|
64
|
+
|
|
66
65
|
#: kernel size used for median filtering
|
|
67
66
|
self.kernel_size = kernel_size
|
|
68
67
|
#: number of events processed at once
|
|
@@ -96,12 +95,12 @@ class BackgroundRollMed(Background):
|
|
|
96
95
|
#: queue for median computation jobs
|
|
97
96
|
self.queue = mp_spawn.Queue()
|
|
98
97
|
#: list of workers (processes)
|
|
99
|
-
self.workers = [
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
98
|
+
self.workers = [WorkerRollMed(self.queue,
|
|
99
|
+
self.worker_counter,
|
|
100
|
+
self.shared_input_raw,
|
|
101
|
+
self.shared_output_raw,
|
|
102
|
+
self.batch_size,
|
|
103
|
+
self.kernel_size)
|
|
105
104
|
for _ in range(self.num_cpus)]
|
|
106
105
|
[w.start() for w in self.workers]
|
|
107
106
|
|
|
@@ -120,7 +119,7 @@ class BackgroundRollMed(Background):
|
|
|
120
119
|
"""Check user-defined properties of this class
|
|
121
120
|
|
|
122
121
|
This method primarily exists so that the CLI knows which
|
|
123
|
-
keyword
|
|
122
|
+
keyword arguments can be passed to this class.
|
|
124
123
|
|
|
125
124
|
Parameters
|
|
126
125
|
----------
|
|
@@ -133,7 +132,8 @@ class BackgroundRollMed(Background):
|
|
|
133
132
|
`kernel_size` will not increase computation speed. Larger
|
|
134
133
|
values lead to a higher memory consumption.
|
|
135
134
|
"""
|
|
136
|
-
assert kernel_size > 0
|
|
135
|
+
assert kernel_size > 0, "kernel size must be positive number"
|
|
136
|
+
assert kernel_size % 2 == 0, "kernel size must be even number"
|
|
137
137
|
assert batch_size > kernel_size
|
|
138
138
|
|
|
139
139
|
def get_slices_for_batch(self, batch_index=0):
|
|
@@ -147,9 +147,9 @@ class BackgroundRollMed(Background):
|
|
|
147
147
|
stop_in = (batch_index + 1) * self.batch_size + self.kernel_size
|
|
148
148
|
stop_out = (batch_index + 1) * self.batch_size
|
|
149
149
|
|
|
150
|
-
if stop_in > self.
|
|
151
|
-
stop_in = self.
|
|
152
|
-
stop_out = self.
|
|
150
|
+
if stop_in > self.image_count:
|
|
151
|
+
stop_in = self.image_count
|
|
152
|
+
stop_out = self.image_count - self.kernel_size
|
|
153
153
|
|
|
154
154
|
slice_in = slice(start, stop_in)
|
|
155
155
|
slice_out = slice(start, stop_out)
|
|
@@ -170,16 +170,21 @@ class BackgroundRollMed(Background):
|
|
|
170
170
|
|
|
171
171
|
def process_approach(self):
|
|
172
172
|
"""Perform median computation on entire input data"""
|
|
173
|
-
num_steps = int(np.ceil(self.
|
|
173
|
+
num_steps = int(np.ceil(self.image_count / self.batch_size))
|
|
174
174
|
for ii in range(num_steps):
|
|
175
|
-
print(f"Computing background {ii/num_steps*100:.0f}%",
|
|
176
|
-
end="\r", flush=True)
|
|
177
175
|
self.process_next_batch()
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
176
|
+
|
|
177
|
+
# Set the remaining median bg images to the last one.
|
|
178
|
+
num_remaining = (self.input_data.shape[0]
|
|
179
|
+
- self.h5out["events/image_bg"].shape[0])
|
|
180
|
+
if num_remaining:
|
|
181
|
+
last_image = self.h5out["events/image_bg"][-1]
|
|
182
|
+
last_chunk = np.repeat(
|
|
183
|
+
last_image[np.newaxis],
|
|
184
|
+
num_remaining,
|
|
185
|
+
axis=0)
|
|
186
|
+
self.writer.store_feature_chunk("image_bg", last_chunk)
|
|
187
|
+
self.image_proc.value = 1
|
|
183
188
|
|
|
184
189
|
def process_next_batch(self):
|
|
185
190
|
"""Process one batch of input data"""
|
|
@@ -211,18 +216,21 @@ class BackgroundRollMed(Background):
|
|
|
211
216
|
# TODO:
|
|
212
217
|
# Do this in a different thread so workers can keep going
|
|
213
218
|
# and use a lock somewhere in case the disk is too slow.
|
|
214
|
-
self.
|
|
219
|
+
self.writer.store_feature_chunk(
|
|
220
|
+
"image_bg",
|
|
215
221
|
self.shared_output[:output_size].reshape(output_size,
|
|
216
|
-
*self.image_shape)
|
|
222
|
+
*self.image_shape),
|
|
223
|
+
)
|
|
217
224
|
|
|
218
225
|
self.current_batch += 1
|
|
226
|
+
self.image_proc.value += self.batch_size / self.image_count
|
|
219
227
|
|
|
220
228
|
|
|
221
|
-
class
|
|
229
|
+
class WorkerRollMed(mp_spawn.Process):
|
|
222
230
|
def __init__(self, job_queue, counter, shared_input, shared_output,
|
|
223
231
|
batch_size, kernel_size, *args, **kwargs):
|
|
224
232
|
"""Worker process for median computation"""
|
|
225
|
-
super(
|
|
233
|
+
super(WorkerRollMed, self).__init__(*args, **kwargs)
|
|
226
234
|
self.queue = job_queue
|
|
227
235
|
self.queue.cancel_join_thread()
|
|
228
236
|
self.counter = counter
|