dcnum 0.13.2__py3-none-any.whl → 0.23.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

Files changed (55) hide show
  1. dcnum/_version.py +2 -2
  2. dcnum/feat/__init__.py +2 -1
  3. dcnum/feat/event_extractor_manager_thread.py +67 -33
  4. dcnum/feat/feat_background/__init__.py +3 -12
  5. dcnum/feat/feat_background/base.py +80 -65
  6. dcnum/feat/feat_background/bg_copy.py +31 -0
  7. dcnum/feat/feat_background/bg_roll_median.py +38 -30
  8. dcnum/feat/feat_background/bg_sparse_median.py +96 -45
  9. dcnum/feat/feat_brightness/__init__.py +1 -0
  10. dcnum/feat/feat_brightness/bright_all.py +41 -6
  11. dcnum/feat/feat_contour/__init__.py +4 -0
  12. dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
  13. dcnum/feat/feat_contour/volume.py +174 -0
  14. dcnum/feat/feat_texture/__init__.py +1 -0
  15. dcnum/feat/feat_texture/tex_all.py +28 -1
  16. dcnum/feat/gate.py +92 -70
  17. dcnum/feat/queue_event_extractor.py +139 -70
  18. dcnum/logic/__init__.py +5 -0
  19. dcnum/logic/ctrl.py +794 -0
  20. dcnum/logic/job.py +184 -0
  21. dcnum/logic/json_encoder.py +19 -0
  22. dcnum/meta/__init__.py +1 -0
  23. dcnum/meta/paths.py +30 -0
  24. dcnum/meta/ppid.py +66 -9
  25. dcnum/read/__init__.py +1 -0
  26. dcnum/read/cache.py +109 -77
  27. dcnum/read/const.py +6 -4
  28. dcnum/read/hdf5_data.py +190 -31
  29. dcnum/read/mapped.py +87 -0
  30. dcnum/segm/__init__.py +6 -15
  31. dcnum/segm/segm_thresh.py +7 -14
  32. dcnum/segm/segm_torch/__init__.py +19 -0
  33. dcnum/segm/segm_torch/segm_torch_base.py +125 -0
  34. dcnum/segm/segm_torch/segm_torch_mpo.py +71 -0
  35. dcnum/segm/segm_torch/segm_torch_sto.py +88 -0
  36. dcnum/segm/segm_torch/torch_model.py +95 -0
  37. dcnum/segm/segm_torch/torch_postproc.py +93 -0
  38. dcnum/segm/segm_torch/torch_preproc.py +114 -0
  39. dcnum/segm/segmenter.py +245 -96
  40. dcnum/segm/segmenter_manager_thread.py +39 -28
  41. dcnum/segm/{segmenter_cpu.py → segmenter_mpo.py} +137 -43
  42. dcnum/segm/segmenter_sto.py +110 -0
  43. dcnum/write/__init__.py +3 -1
  44. dcnum/write/deque_writer_thread.py +15 -5
  45. dcnum/write/queue_collector_thread.py +14 -17
  46. dcnum/write/writer.py +225 -55
  47. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/METADATA +4 -2
  48. dcnum-0.23.1.dist-info/RECORD +55 -0
  49. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/WHEEL +1 -1
  50. dcnum/feat/feat_moments/__init__.py +0 -3
  51. dcnum/segm/segmenter_gpu.py +0 -45
  52. dcnum-0.13.2.dist-info/RECORD +0 -40
  53. /dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
  54. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/LICENSE +0 -0
  55. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/top_level.txt +0 -0
dcnum/_version.py CHANGED
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.13.2'
16
- __version_tuple__ = version_tuple = (0, 13, 2)
15
+ __version__ = version = '0.23.1'
16
+ __version_tuple__ = version_tuple = (0, 23, 1)
dcnum/feat/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  # flake8: noqa: F401
2
- from . import feat_background, feat_brightness, feat_moments, feat_texture
2
+ """Feature computation"""
3
+ from . import feat_background, feat_brightness, feat_contour, feat_texture
3
4
  from .event_extractor_manager_thread import EventExtractorManagerThread
4
5
  from .queue_event_extractor import (
5
6
  QueueEventExtractor, EventExtractorThread, EventExtractorProcess
@@ -1,3 +1,5 @@
1
+ """Feature computation: managing event extraction threads"""
2
+ import collections
1
3
  import logging
2
4
  import multiprocessing as mp
3
5
  import threading
@@ -16,6 +18,7 @@ class EventExtractorManagerThread(threading.Thread):
16
18
  labels_list: List,
17
19
  fe_kwargs: Dict,
18
20
  num_workers: int,
21
+ writer_dq: collections.deque,
19
22
  debug: bool = False,
20
23
  *args, **kwargs):
21
24
  """Manage event extraction threads or precesses
@@ -39,9 +42,12 @@ class EventExtractorManagerThread(threading.Thread):
39
42
  :func:`.EventExtractor.get_init_kwargs` for more information.
40
43
  num_workers:
41
44
  Number of child threads or worker processes to use.
45
+ writer_dq:
46
+ The queue the writer uses. We monitor this queue. If it
47
+ fills up, we take a break.
42
48
  debug:
43
- Whether to run in debugging mode which means more log
44
- messages and only one thread (`num_workers` has no effect).
49
+ Whether to run in debugging mode which means only one
50
+ event extraction thread (`num_workers` has no effect).
45
51
  """
46
52
  super(EventExtractorManagerThread, self).__init__(
47
53
  name="EventExtractorManager", *args, **kwargs)
@@ -65,6 +71,8 @@ class EventExtractorManagerThread(threading.Thread):
65
71
  self.label_array = np.ctypeslib.as_array(
66
72
  self.fe_kwargs["label_array"]).reshape(
67
73
  self.data.image.chunk_shape)
74
+ #: Writer deque to monitor
75
+ self.writer_dq = writer_dq
68
76
  #: Time counter for feature extraction
69
77
  self.t_count = 0
70
78
  #: Whether debugging is enabled
@@ -76,30 +84,52 @@ class EventExtractorManagerThread(threading.Thread):
76
84
  worker_cls = EventExtractorThread
77
85
  else:
78
86
  worker_cls = EventExtractorProcess
79
- workers = [worker_cls(*list(self.fe_kwargs.values()))
80
- for _ in range(self.num_workers)]
87
+ workers = [worker_cls(*list(self.fe_kwargs.values()), worker_index=ii)
88
+ for ii in range(self.num_workers)]
81
89
  [w.start() for w in workers]
90
+ worker_monitor = self.fe_kwargs["worker_monitor"]
82
91
 
92
+ num_slots = len(self.slot_states)
83
93
  chunks_processed = 0
94
+ frames_processed = 0
84
95
  while True:
85
- num_slots = len(self.slot_states)
86
- cur_slot = 0
96
+ # If the writer_dq starts filling up, then this could lead to
97
+ # an oom-kill signal. Stall for the writer to prevent this.
98
+ if (ldq := len(self.writer_dq)) > 1000:
99
+ time.sleep(1)
100
+ ldq2 = len(self.writer_dq)
101
+ stall_time = max(0., (ldq2 - 200) / ((ldq - ldq2) or 1))
102
+ time.sleep(stall_time)
103
+ self.logger.warning(
104
+ f"Stalled {stall_time + 1:.1f}s for slow writer "
105
+ f"({ldq} chunks queued)")
106
+
87
107
  unavailable_slots = 0
108
+ found_free_slot = False
88
109
  # Check all slots for segmented labels
89
- while True:
90
- # - "e" there is data from the segmenter (the extractor
91
- # can take it and process it)
92
- # - "s" the extractor processed the data and is waiting
93
- # for the segmenter
94
- if self.slot_states[cur_slot] == "e":
95
- break
96
- else:
97
- unavailable_slots += 1
98
- cur_slot = (cur_slot + 1) % num_slots
99
- if unavailable_slots >= num_slots:
100
- # There is nothing to do, try to avoid 100% CPU
101
- unavailable_slots = 0
102
- time.sleep(.1)
110
+ while not found_free_slot:
111
+ # We sort the slots according to the slot chunks so that we
112
+ # always process the slot with the smallest slot chunk number
113
+ # first. Initially, the slot_chunks array is filled with
114
+ # zeros, but the segmenter fills up the slots with the lowest
115
+ # number first.
116
+ for cur_slot in np.argsort(self.slot_chunks):
117
+ # - "e" there is data from the segmenter (the extractor
118
+ # can take it and process it)
119
+ # - "s" the extractor processed the data and is waiting
120
+ # for the segmenter
121
+ if self.slot_states[cur_slot] == "e":
122
+ # The segmenter has something for us in this slot.
123
+ found_free_slot = True
124
+ break
125
+ else:
126
+ # Try another slot.
127
+ unavailable_slots += 1
128
+ cur_slot = (cur_slot + 1) % num_slots
129
+ if unavailable_slots >= num_slots:
130
+ # There is nothing to do, try to avoid 100% CPU
131
+ unavailable_slots = 0
132
+ time.sleep(.1)
103
133
 
104
134
  t1 = time.monotonic()
105
135
 
@@ -114,34 +144,38 @@ class EventExtractorManagerThread(threading.Thread):
114
144
  self.label_array[len(new_labels):] = 0
115
145
  else:
116
146
  raise ValueError("labels_list contains bad size data!")
147
+
117
148
  # Let the workers know there is work
118
- for ii in range(self.data.image.get_chunk_size(chunk)):
119
- self.raw_queue.put((chunk, ii))
149
+ chunk_size = self.data.image.get_chunk_size(chunk)
150
+ [self.raw_queue.put((chunk, ii)) for ii in range(chunk_size)]
120
151
 
121
152
  # Make sure the entire chunk has been processed.
122
- while self.raw_queue.qsize():
153
+ while np.sum(worker_monitor) != frames_processed + chunk_size:
123
154
  time.sleep(.1)
124
155
 
125
156
  # We are done here. The segmenter may continue its deed.
126
157
  self.slot_states[cur_slot] = "w"
127
158
 
128
- self.logger.debug(f"Extracted one chunk: {chunk}")
159
+ self.logger.debug(f"Extracted chunk {chunk} in slot {cur_slot}")
129
160
  self.t_count += time.monotonic() - t1
130
161
 
131
162
  chunks_processed += 1
163
+ frames_processed += chunk_size
132
164
 
133
165
  if chunks_processed == self.data.image.num_chunks:
134
166
  break
135
167
 
136
- self.logger.debug("Waiting for event_queue to empty.")
137
- # Wait until the event queue is empty.
138
- event_queue = self.fe_kwargs["event_queue"]
139
- while not event_queue.empty():
140
- # The collector thread is still sorting things out. Wait
141
- # before joining the threads.
142
- time.sleep(.1)
143
- self.logger.debug("Requesting extraction workers to join.")
168
+ inv_masks = self.fe_kwargs["invalid_mask_counter"].value
169
+ if inv_masks:
170
+ self.logger.info(f"Encountered {inv_masks} invalid masks")
171
+ inv_frac = inv_masks / len(self.data)
172
+ if inv_frac > 0.005: # warn above one half percent
173
+ self.logger.warning(f"Discarded {inv_frac:.1%} of the masks, "
174
+ f"please check segmenter applicability")
175
+
176
+ self.logger.debug("Requesting extraction workers to join")
144
177
  self.fe_kwargs["finalize_extraction"].value = True
145
178
  [w.join() for w in workers]
146
- self.logger.debug("Finished extraction.")
179
+
180
+ self.logger.debug("Finished extraction")
147
181
  self.logger.info(f"Extraction time: {self.t_count:.1f}s")
@@ -1,16 +1,7 @@
1
1
  # flake8: noqa: F401
2
- import functools
3
-
4
- from .base import Background
2
+ """Feature computation: background image data from image data"""
3
+ from .base import Background, get_available_background_methods
5
4
  # Background methods are registered by importing them here.
5
+ from .bg_copy import BackgroundCopy
6
6
  from .bg_roll_median import BackgroundRollMed
7
7
  from .bg_sparse_median import BackgroundSparseMed
8
-
9
-
10
- @functools.cache
11
- def get_available_background_methods():
12
- """Return dictionary of background computation methods"""
13
- methods = {}
14
- for cls in Background.__subclasses__():
15
- methods[cls.key()] = cls
16
- return methods
@@ -1,16 +1,19 @@
1
1
  import abc
2
+ import functools
2
3
  import inspect
3
4
  import multiprocessing as mp
4
5
  import pathlib
5
- import uuid
6
6
 
7
7
  import h5py
8
- import hdf5plugin
9
- import numpy as np
10
8
 
11
9
  from ...meta import ppid
12
10
  from ...read import HDF5Data
13
- from ...write import create_with_basins
11
+ from ...write import HDF5Writer, create_with_basins, set_default_filter_kwargs
12
+
13
+
14
+ # All subprocesses should use 'spawn' to avoid issues with threads
15
+ # and 'fork' on POSIX systems.
16
+ mp_spawn = mp.get_context('spawn')
14
17
 
15
18
 
16
19
  class Background(abc.ABC):
@@ -53,12 +56,14 @@ class Background(abc.ABC):
53
56
  self.kwargs.update(kwargs)
54
57
 
55
58
  if num_cpus is None:
56
- num_cpus = mp.cpu_count()
59
+ num_cpus = mp_spawn.cpu_count()
57
60
  #: number of CPUs used
58
61
  self.num_cpus = num_cpus
59
62
 
60
- #: number of frames
61
- self.event_count = None
63
+ #: number of images in the input data
64
+ self.image_count = None
65
+ #: fraction images that have been processed
66
+ self.image_proc = mp_spawn.Value("d", 0)
62
67
 
63
68
  #: HDF5Data instance for input data
64
69
  self.hdin = None
@@ -86,12 +91,10 @@ class Background(abc.ABC):
86
91
  else:
87
92
  self.input_data = input_data
88
93
 
89
- #: unique identifier
90
- self.name = str(uuid.uuid4())
91
94
  #: shape of event images
92
95
  self.image_shape = self.input_data[0].shape
93
96
  #: total number of events
94
- self.event_count = len(self.input_data)
97
+ self.image_count = len(self.input_data)
95
98
 
96
99
  if self.h5out is None:
97
100
  if not output_path.exists():
@@ -102,66 +105,23 @@ class Background(abc.ABC):
102
105
  # "a", because output file already exists
103
106
  self.h5out = h5py.File(output_path, "a", libver="latest")
104
107
 
105
- # Initialize background data
106
- if compress:
107
- compression_kwargs = hdf5plugin.Zstd(clevel=5)
108
- else:
109
- compression_kwargs = {}
110
- h5bg = self.h5out.require_dataset(
111
- "events/image_bg",
112
- shape=self.input_data.shape,
113
- dtype=np.uint8,
114
- chunks=(min(100, self.event_count),
115
- self.image_shape[0],
116
- self.image_shape[1]),
117
- fletcher32=True,
118
- **compression_kwargs,
108
+ # Initialize writer
109
+ self.writer = HDF5Writer(
110
+ obj=self.h5out,
111
+ ds_kwds=set_default_filter_kwargs(compression=compress),
119
112
  )
120
- h5bg.attrs.create('CLASS', np.string_('IMAGE'))
121
- h5bg.attrs.create('IMAGE_VERSION', np.string_('1.2'))
122
- h5bg.attrs.create('IMAGE_SUBCLASS', np.string_('IMAGE_GRAYSCALE'))
123
113
 
124
114
  def __enter__(self):
125
115
  return self
126
116
 
127
117
  def __exit__(self, type, value, tb):
118
+ self.writer.close()
128
119
  # Close h5in and h5out
129
120
  if self.hdin is not None: # we have an input file
130
121
  self.hdin.close() # this closes self.h5in
131
122
  if self.h5in is not self.h5out and self.h5out is not None:
132
123
  self.h5out.close()
133
124
 
134
- @staticmethod
135
- def get_kwargs_from_ppid(bg_ppid):
136
- """Return keyword arguments for any subclass from a PPID string"""
137
- name, pp_check_user_kwargs = bg_ppid.split(":")
138
- for cls in Background.__subclasses__():
139
- if cls.key() == name:
140
- break
141
- else:
142
- raise ValueError(
143
- f"Could not find background computation method '{name}'!")
144
- kwargs = ppid.ppid_to_kwargs(cls=cls,
145
- method="check_user_kwargs",
146
- ppid=pp_check_user_kwargs)
147
- return kwargs
148
-
149
- @classmethod
150
- def get_ppid_from_kwargs(cls, kwargs):
151
- """Return the PPID based on given keyword arguments for a subclass"""
152
- key = cls.key()
153
- cback = ppid.kwargs_to_ppid(cls, "check_user_kwargs", kwargs)
154
- return ":".join([key, cback])
155
-
156
- @classmethod
157
- def key(cls):
158
- if cls is Background:
159
- raise ValueError("Cannot get `key` for `Background` base class!")
160
- key = cls.__name__.lower()
161
- if key.startswith("background"):
162
- key = key[10:]
163
- return key
164
-
165
125
  @abc.abstractmethod
166
126
  def check_user_kwargs(self, **kwargs):
167
127
  """Implement this to check the kwargs during init"""
@@ -170,7 +130,7 @@ class Background(abc.ABC):
170
130
  """Return a unique background pipeline identifier
171
131
 
172
132
  The pipeline identifier is universally applicable and must
173
- be backwards-compatible (future versions of dcevent will
133
+ be backwards-compatible (future versions of dcnum will
174
134
  correctly acknowledge the ID).
175
135
 
176
136
  The segmenter pipeline ID is defined as::
@@ -186,17 +146,72 @@ class Background(abc.ABC):
186
146
 
187
147
  k=100^b=10000
188
148
  """
189
- return self.get_ppid_from_kwargs(self.kwargs)
149
+ return self.get_ppid_from_ppkw(self.kwargs)
150
+
151
+ @classmethod
152
+ def get_ppid_code(cls):
153
+ if cls is Background:
154
+ raise ValueError("Cannot get `key` for `Background` base class!")
155
+ key = cls.__name__.lower()
156
+ if key.startswith("background"):
157
+ key = key[10:]
158
+ return key
159
+
160
+ @classmethod
161
+ def get_ppid_from_ppkw(cls, kwargs):
162
+ """Return the PPID based on given keyword arguments for a subclass"""
163
+ code = cls.get_ppid_code()
164
+ cback = ppid.kwargs_to_ppid(cls, "check_user_kwargs", kwargs)
165
+ return ":".join([code, cback])
166
+
167
+ @staticmethod
168
+ def get_ppkw_from_ppid(bg_ppid):
169
+ """Return keyword arguments for any subclass from a PPID string"""
170
+ code, pp_check_user_kwargs = bg_ppid.split(":")
171
+ for bg_code in get_available_background_methods():
172
+ if bg_code == code:
173
+ cls = get_available_background_methods()[bg_code]
174
+ break
175
+ else:
176
+ raise ValueError(
177
+ f"Could not find background computation method '{code}'!")
178
+ kwargs = ppid.ppid_to_kwargs(cls=cls,
179
+ method="check_user_kwargs",
180
+ ppid=pp_check_user_kwargs)
181
+ return kwargs
182
+
183
+ def get_progress(self):
184
+ """Return progress of background computation, float in [0,1]"""
185
+ if self.image_count == 0:
186
+ return 0.
187
+ else:
188
+ return self.image_proc.value
190
189
 
191
190
  def process(self):
191
+ # Delete any old background data
192
+ for key in ["image_bg", "bg_off"]:
193
+ if key in self.h5out["events"]:
194
+ del self.h5out["events"][key]
195
+ # Perform the actual background computation
192
196
  self.process_approach()
193
-
194
197
  bg_ppid = self.get_ppid()
195
- # Store pipeline information in the image_bg feature
196
- self.h5out["events/image_bg"].attrs["dcnum ppid background"] = bg_ppid
197
- self.h5out["events/image_bg"].attrs["dcnum ppid generation"] = \
198
- ppid.DCNUM_PPID_GENERATION
198
+ # Store pipeline information in the image_bg/bg_off feature
199
+ for key in ["image_bg", "bg_off"]:
200
+ if key in self.h5out["events"]:
201
+ self.h5out[f"events/{key}"].attrs["dcnum ppid background"] = \
202
+ bg_ppid
203
+ self.h5out[F"events/{key}"].attrs["dcnum ppid generation"] = \
204
+ ppid.DCNUM_PPID_GENERATION
199
205
 
200
206
  @abc.abstractmethod
201
207
  def process_approach(self):
202
208
  """The actual background computation approach"""
209
+
210
+
211
+ @functools.cache
212
+ def get_available_background_methods():
213
+ """Return dictionary of background computation methods"""
214
+ methods = {}
215
+ for cls in Background.__subclasses__():
216
+ methods[cls.get_ppid_code()] = cls
217
+ return methods
@@ -0,0 +1,31 @@
1
+ import h5py
2
+
3
+ from .base import Background
4
+
5
+
6
+ class BackgroundCopy(Background):
7
+ @staticmethod
8
+ def check_user_kwargs():
9
+ pass
10
+
11
+ def process(self):
12
+ """Copy input data to output dataset"""
13
+ if self.h5in != self.h5out:
14
+ hin = self.hdin.h5
15
+ for feat in ["image_bg", "bg_off"]:
16
+ if feat in hin["events"]:
17
+ h5py.h5o.copy(src_loc=hin["events"].id,
18
+ src_name=feat.encode("utf-8"),
19
+ dst_loc=self.h5out["events"].id,
20
+ dst_name=feat.encode("utf-8"),
21
+ )
22
+
23
+ # set progress to 100%
24
+ self.image_proc.value = 1
25
+
26
+ def process_approach(self):
27
+ # We do the copying in `process`, because we do not want to modify
28
+ # any metadata or delete datasets as is done in the base class.
29
+ # But we still have to implement this method, because it is an
30
+ # abstractmethod in the base class.
31
+ pass
@@ -1,16 +1,10 @@
1
- import multiprocessing as mp
2
1
  import queue
3
2
  import time
4
3
 
5
4
  import numpy as np
6
5
  from scipy import ndimage
7
6
 
8
- from .base import Background
9
-
10
-
11
- # All subprocesses should use 'spawn' to avoid issues with threads
12
- # and 'fork' on POSIX systems.
13
- mp_spawn = mp.get_context('spawn')
7
+ from .base import mp_spawn, Background
14
8
 
15
9
 
16
10
  class BackgroundRollMed(Background):
@@ -63,6 +57,11 @@ class BackgroundRollMed(Background):
63
57
  kernel_size=kernel_size,
64
58
  batch_size=batch_size)
65
59
 
60
+ if kernel_size > len(self.input_data):
61
+ raise ValueError(f"Cannot compute background when the input data "
62
+ f"size {len(self.input_data)} is larger than the "
63
+ f"kernel size {kernel_size}!")
64
+
66
65
  #: kernel size used for median filtering
67
66
  self.kernel_size = kernel_size
68
67
  #: number of events processed at once
@@ -96,12 +95,12 @@ class BackgroundRollMed(Background):
96
95
  #: queue for median computation jobs
97
96
  self.queue = mp_spawn.Queue()
98
97
  #: list of workers (processes)
99
- self.workers = [MedianWorker(self.queue,
100
- self.worker_counter,
101
- self.shared_input_raw,
102
- self.shared_output_raw,
103
- self.batch_size,
104
- self.kernel_size)
98
+ self.workers = [WorkerRollMed(self.queue,
99
+ self.worker_counter,
100
+ self.shared_input_raw,
101
+ self.shared_output_raw,
102
+ self.batch_size,
103
+ self.kernel_size)
105
104
  for _ in range(self.num_cpus)]
106
105
  [w.start() for w in self.workers]
107
106
 
@@ -120,7 +119,7 @@ class BackgroundRollMed(Background):
120
119
  """Check user-defined properties of this class
121
120
 
122
121
  This method primarily exists so that the CLI knows which
123
- keyword arguements can be passed to this class.
122
+ keyword arguments can be passed to this class.
124
123
 
125
124
  Parameters
126
125
  ----------
@@ -133,7 +132,8 @@ class BackgroundRollMed(Background):
133
132
  `kernel_size` will not increase computation speed. Larger
134
133
  values lead to a higher memory consumption.
135
134
  """
136
- assert kernel_size > 0
135
+ assert kernel_size > 0, "kernel size must be positive number"
136
+ assert kernel_size % 2 == 0, "kernel size must be even number"
137
137
  assert batch_size > kernel_size
138
138
 
139
139
  def get_slices_for_batch(self, batch_index=0):
@@ -147,9 +147,9 @@ class BackgroundRollMed(Background):
147
147
  stop_in = (batch_index + 1) * self.batch_size + self.kernel_size
148
148
  stop_out = (batch_index + 1) * self.batch_size
149
149
 
150
- if stop_in > self.event_count:
151
- stop_in = self.event_count
152
- stop_out = self.event_count - self.kernel_size
150
+ if stop_in > self.image_count:
151
+ stop_in = self.image_count
152
+ stop_out = self.image_count - self.kernel_size
153
153
 
154
154
  slice_in = slice(start, stop_in)
155
155
  slice_out = slice(start, stop_out)
@@ -170,16 +170,21 @@ class BackgroundRollMed(Background):
170
170
 
171
171
  def process_approach(self):
172
172
  """Perform median computation on entire input data"""
173
- num_steps = int(np.ceil(self.event_count / self.batch_size))
173
+ num_steps = int(np.ceil(self.image_count / self.batch_size))
174
174
  for ii in range(num_steps):
175
- print(f"Computing background {ii/num_steps*100:.0f}%",
176
- end="\r", flush=True)
177
175
  self.process_next_batch()
178
- # Set the remaining kernel_size median values to the last one
179
- last_image = self.h5out["events/image_bg"][-self.kernel_size-1]
180
- for ii in range(self.kernel_size):
181
- self.h5out["events/image_bg"][self.event_count-ii-1] = last_image
182
- print("Computing background 100% ", flush=True)
176
+
177
+ # Set the remaining median bg images to the last one.
178
+ num_remaining = (self.input_data.shape[0]
179
+ - self.h5out["events/image_bg"].shape[0])
180
+ if num_remaining:
181
+ last_image = self.h5out["events/image_bg"][-1]
182
+ last_chunk = np.repeat(
183
+ last_image[np.newaxis],
184
+ num_remaining,
185
+ axis=0)
186
+ self.writer.store_feature_chunk("image_bg", last_chunk)
187
+ self.image_proc.value = 1
183
188
 
184
189
  def process_next_batch(self):
185
190
  """Process one batch of input data"""
@@ -211,18 +216,21 @@ class BackgroundRollMed(Background):
211
216
  # TODO:
212
217
  # Do this in a different thread so workers can keep going
213
218
  # and use a lock somewhere in case the disk is too slow.
214
- self.h5out["events/image_bg"][cur_slice_out] = \
219
+ self.writer.store_feature_chunk(
220
+ "image_bg",
215
221
  self.shared_output[:output_size].reshape(output_size,
216
- *self.image_shape)
222
+ *self.image_shape),
223
+ )
217
224
 
218
225
  self.current_batch += 1
226
+ self.image_proc.value += self.batch_size / self.image_count
219
227
 
220
228
 
221
- class MedianWorker(mp_spawn.Process):
229
+ class WorkerRollMed(mp_spawn.Process):
222
230
  def __init__(self, job_queue, counter, shared_input, shared_output,
223
231
  batch_size, kernel_size, *args, **kwargs):
224
232
  """Worker process for median computation"""
225
- super(MedianWorker, self).__init__(*args, **kwargs)
233
+ super(WorkerRollMed, self).__init__(*args, **kwargs)
226
234
  self.queue = job_queue
227
235
  self.queue.cancel_join_thread()
228
236
  self.counter = counter