dcnum 0.13.2__py3-none-any.whl → 0.23.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

Files changed (55) hide show
  1. dcnum/_version.py +2 -2
  2. dcnum/feat/__init__.py +2 -1
  3. dcnum/feat/event_extractor_manager_thread.py +67 -33
  4. dcnum/feat/feat_background/__init__.py +3 -12
  5. dcnum/feat/feat_background/base.py +80 -65
  6. dcnum/feat/feat_background/bg_copy.py +31 -0
  7. dcnum/feat/feat_background/bg_roll_median.py +38 -30
  8. dcnum/feat/feat_background/bg_sparse_median.py +96 -45
  9. dcnum/feat/feat_brightness/__init__.py +1 -0
  10. dcnum/feat/feat_brightness/bright_all.py +41 -6
  11. dcnum/feat/feat_contour/__init__.py +4 -0
  12. dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
  13. dcnum/feat/feat_contour/volume.py +174 -0
  14. dcnum/feat/feat_texture/__init__.py +1 -0
  15. dcnum/feat/feat_texture/tex_all.py +28 -1
  16. dcnum/feat/gate.py +92 -70
  17. dcnum/feat/queue_event_extractor.py +139 -70
  18. dcnum/logic/__init__.py +5 -0
  19. dcnum/logic/ctrl.py +794 -0
  20. dcnum/logic/job.py +184 -0
  21. dcnum/logic/json_encoder.py +19 -0
  22. dcnum/meta/__init__.py +1 -0
  23. dcnum/meta/paths.py +30 -0
  24. dcnum/meta/ppid.py +66 -9
  25. dcnum/read/__init__.py +1 -0
  26. dcnum/read/cache.py +109 -77
  27. dcnum/read/const.py +6 -4
  28. dcnum/read/hdf5_data.py +190 -31
  29. dcnum/read/mapped.py +87 -0
  30. dcnum/segm/__init__.py +6 -15
  31. dcnum/segm/segm_thresh.py +7 -14
  32. dcnum/segm/segm_torch/__init__.py +19 -0
  33. dcnum/segm/segm_torch/segm_torch_base.py +125 -0
  34. dcnum/segm/segm_torch/segm_torch_mpo.py +71 -0
  35. dcnum/segm/segm_torch/segm_torch_sto.py +88 -0
  36. dcnum/segm/segm_torch/torch_model.py +95 -0
  37. dcnum/segm/segm_torch/torch_postproc.py +93 -0
  38. dcnum/segm/segm_torch/torch_preproc.py +114 -0
  39. dcnum/segm/segmenter.py +245 -96
  40. dcnum/segm/segmenter_manager_thread.py +39 -28
  41. dcnum/segm/{segmenter_cpu.py → segmenter_mpo.py} +137 -43
  42. dcnum/segm/segmenter_sto.py +110 -0
  43. dcnum/write/__init__.py +3 -1
  44. dcnum/write/deque_writer_thread.py +15 -5
  45. dcnum/write/queue_collector_thread.py +14 -17
  46. dcnum/write/writer.py +225 -55
  47. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/METADATA +4 -2
  48. dcnum-0.23.1.dist-info/RECORD +55 -0
  49. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/WHEEL +1 -1
  50. dcnum/feat/feat_moments/__init__.py +0 -3
  51. dcnum/segm/segmenter_gpu.py +0 -45
  52. dcnum-0.13.2.dist-info/RECORD +0 -40
  53. /dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
  54. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/LICENSE +0 -0
  55. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/top_level.txt +0 -0
dcnum/feat/gate.py CHANGED
@@ -1,68 +1,64 @@
1
+ """Feature gating"""
1
2
  import copy
3
+ import numbers
4
+ import warnings
2
5
 
3
6
  import numpy as np
4
7
 
5
- from ..meta.ppid import kwargs_to_ppid
8
+ from ..meta.ppid import kwargs_to_ppid, ppid_to_kwargs
6
9
 
7
10
 
8
11
  class Gate:
9
12
  #: the default value for `size_thresh_mask` if not given as kwarg
10
13
  _default_size_thresh_mask = 10
11
14
 
12
- def __init__(self, data, *, online_gates: bool = False,
15
+ def __init__(self, data, *,
16
+ online_gates: bool = False,
13
17
  size_thresh_mask: int = None):
14
18
  """Gate feature data
15
19
 
16
20
  Parameters
17
21
  ----------
18
22
  data: .HDF5Data
19
- dcevent data instance
23
+ dcnum data instance
20
24
  online_gates: bool
21
- set to True to enable gating with `online_filters`
25
+ set to True to enable gating with "online" gates stored
26
+ in the input file; online gates are applied in real-time
27
+ deformability cytometry before writing data to disk during
28
+ a measurement
22
29
  size_thresh_mask: int
23
30
  Only masks with more pixels than `size_thresh_mask` are
24
- considered to be a valid event; Originally, this
25
- `trig_thresh` value defaulted to 200, but this seemed to
26
- be a little too large, defaults to 10.
31
+ considered to be a valid event; Originally, the
32
+ `bin area min`/`trig_thresh` value defaulted to 200 which is
33
+ too large; defaults to 10 or the original value in case
34
+ `online_gates` is set.
27
35
  """
28
- #: dcevent .HDF5Data instance
29
- self.data = data
30
-
31
- #: gating keyword arguments
32
- self.kwargs = {}
36
+ #: box gating (value range for each feature)
37
+ self.box_gates = {}
33
38
 
34
39
  if online_gates:
35
- self.box_gates = self._compute_online_gates()
36
- # Set triggering threshold to value from source dataset
37
- self._set_kwarg("size_thresh_mask", "online_contour",
38
- "bin area min", size_thresh_mask)
39
- # If the user did not provide a value and there is nothing in the
40
- # original file, then set the default value.
41
- if self.kwargs.get("size_thresh_mask") is None:
42
- self.kwargs["size_thresh_mask"] = \
43
- self._default_size_thresh_mask
44
- else:
45
- self.box_gates = {}
46
- # If the user did not provide a size_thresh_mask, use the default.
40
+ # Deal with online gates.
41
+ # First, compute the box gates.
42
+ self.box_gates.update(self._extract_online_gates(data))
43
+ # If the user did not specify a threshold, attempt to extract
44
+ # it from the metadata.
47
45
  if size_thresh_mask is None:
48
- size_thresh_mask = self._default_size_thresh_mask
49
- self.kwargs["size_thresh_mask"] = size_thresh_mask
50
-
51
- self.kwargs["online_gates"] = online_gates
52
-
53
- def _set_kwarg(self, name, sec, key, user_value):
54
- if user_value is None:
55
- value = self.data.meta_nest.get(sec, {}).get(key)
56
- else:
57
- value = user_value
58
- if value is not None:
59
- self.kwargs[name] = value
46
+ size_thresh_mask = data.meta_nest.get(
47
+ "online_contour", {}).get("bin area min")
60
48
 
61
- def _compute_online_gates(self):
62
- all_online_filters = {}
49
+ #: gating keyword arguments
50
+ self.kwargs = {
51
+ "online_gates": online_gates,
52
+ # Set the size threshold, defaulting to `_default_size_thresh_mask`
53
+ "size_thresh_mask":
54
+ size_thresh_mask or self._default_size_thresh_mask
55
+ }
56
+
57
+ def _extract_online_gates(self, data):
58
+ ogates = {}
63
59
  # Extract online filters from the dataset
64
- of = self.data.meta_nest.get("online_filter", {})
65
- for key in of:
60
+ source_meta = data.meta_nest.get("online_filter", {})
61
+ for key in source_meta:
66
62
  if key.endswith("polygon points"):
67
63
  raise NotImplementedError("Polygon gating not implemented!")
68
64
  elif (key.endswith("soft limit")
@@ -70,25 +66,36 @@ class Gate:
70
66
  # we only want hard gates
71
67
  continue
72
68
  else:
73
- # only add the filter if it is not a soft limit
74
- sl = of.get(f"{key.rsplit(' ', 1)[0]} soft limit", True)
75
- if not sl:
76
- all_online_filters[key] = of[key]
69
+ try:
70
+ feat, lim = key.rsplit(' ', 1)
71
+ lim_idx = ["min", "max"].index(lim)
72
+ except ValueError:
73
+ warnings.warn(f"Unexpected online gate '{key}'")
74
+ else:
75
+ # make sure we are not dealing with a soft limit
76
+ if not source_meta.get(f"{feat} soft limit", True):
77
+ ogates.setdefault(feat, [None, None])
78
+ ogates[feat][lim_idx] = source_meta[key]
77
79
 
78
80
  # This is somehow hard-coded in Shape-In (minimum size is 3px)
79
- px_size = self.data.pixel_size
80
- all_online_filters["size_x min"] = max(
81
- all_online_filters.get("size_x min", 0), 3 * px_size)
82
- all_online_filters["size_y min"] = max(
83
- all_online_filters.get("size_y min", 0), 3 * px_size)
81
+ px_size = data.pixel_size
82
+ ogates["size_x"] = [
83
+ max(ogates.get("size_x min", 0), 3 * px_size), None]
84
+ ogates["size_y"] = [
85
+ max(ogates.get("size_y min", 0), 3 * px_size), None]
84
86
 
85
- return all_online_filters
87
+ return ogates
88
+
89
+ @property
90
+ def features(self):
91
+ """Sorted list of feature gates defined"""
92
+ return sorted(self.box_gates.keys())
86
93
 
87
94
  def get_ppid(self):
88
95
  """Return a unique gating pipeline identifier
89
96
 
90
97
  The pipeline identifier is universally applicable and must
91
- be backwards-compatible (future versions of dcevent will
98
+ be backwards-compatible (future versions of dcnum will
92
99
  correctly acknowledge the ID).
93
100
 
94
101
  The gating pipeline ID is defined as::
@@ -100,10 +107,15 @@ class Gate:
100
107
 
101
108
  online_gates=True^size_thresh_mask=5
102
109
  """
103
- return self.get_ppid_from_kwargs(self.kwargs)
110
+ return self.get_ppid_from_ppkw(self.kwargs)
111
+
112
+ @classmethod
113
+ def get_ppid_code(cls):
114
+ return "norm"
104
115
 
105
116
  @classmethod
106
- def get_ppid_from_kwargs(cls, kwargs):
117
+ def get_ppid_from_ppkw(cls, kwargs):
118
+ """return full pipeline identifier from the given keywords"""
107
119
  # TODO:
108
120
  # If polygon filters are used, the MD5sum should be used and
109
121
  # they should be placed as a log to the output .rtdc file.
@@ -111,27 +123,21 @@ class Gate:
111
123
  if kwargs.get("size_thresh_mask") is None:
112
124
  # Set the default described in init
113
125
  kwargs["size_thresh_mask"] = cls._default_size_thresh_mask
114
- key = cls.key()
126
+ key = cls.get_ppid_code()
115
127
  cback = kwargs_to_ppid(cls, "__init__", kwargs)
116
128
 
117
129
  return ":".join([key, cback])
118
130
 
119
- @property
120
- def features(self):
121
- return [kk.split()[0] for kk in list(self.box_gates.keys())]
122
-
123
- def gate_feature(self, feat, data):
124
- valid_left = True
125
- valid_right = True
126
- if f"{feat} min" in self.box_gates:
127
- valid_left = data > self.box_gates[f"{feat} min"]
128
- if f"{feat} max" in self.box_gates:
129
- valid_right = data < self.box_gates[f"{feat} max"]
130
- return np.logical_and(valid_left, valid_right)
131
-
132
- @classmethod
133
- def key(cls):
134
- return "norm"
131
+ @staticmethod
132
+ def get_ppkw_from_ppid(gate_ppid):
133
+ code, pp_gate_kwargs = gate_ppid.split(":")
134
+ if code != Gate.get_ppid_code():
135
+ raise ValueError(
136
+ f"Could not find gating method '{code}'!")
137
+ kwargs = ppid_to_kwargs(cls=Gate,
138
+ method="__init__",
139
+ ppid=pp_gate_kwargs)
140
+ return kwargs
135
141
 
136
142
  def gate_event(self, event):
137
143
  """Return None if the event should not be used, else `event`"""
@@ -157,6 +163,22 @@ class Gate:
157
163
  raise ValueError("Empty events provided!")
158
164
  return valid
159
165
 
166
+ def gate_feature(self,
167
+ feat: str,
168
+ data: numbers.Number | np.ndarray):
169
+ """Return boolean indicating whether `data` value is in box gate
170
+
171
+ `data` may be a number or an array. If no box filter is defined
172
+ for `feat`, `True` is always returned. Otherwise, either a boolean
173
+ or a boolean array is returned, depending on the type of `data`.
174
+ Not that `np.logical_and` can deal with mixed argument types
175
+ (scalar and array).
176
+ """
177
+ bound_lo, bound_up = self.box_gates[feat]
178
+ valid_lo = data >= bound_lo if bound_lo is not None else True
179
+ valid_up = data <= bound_up if bound_up is not None else True
180
+ return np.logical_and(valid_lo, valid_up)
181
+
160
182
  def gate_mask(self, mask, mask_sum=None):
161
183
  """Gate the mask, return False if the mask should not be used
162
184
 
@@ -1,3 +1,4 @@
1
+ """Feature Extraction: event extractor worker"""
1
2
  import collections
2
3
  import logging
3
4
  from logging.handlers import QueueHandler
@@ -9,11 +10,11 @@ import traceback
9
10
 
10
11
  import numpy as np
11
12
 
12
- from ..meta.ppid import kwargs_to_ppid
13
+ from ..meta.ppid import kwargs_to_ppid, ppid_to_kwargs
13
14
  from ..read import HDF5Data
14
15
 
15
16
  from .feat_brightness import brightness_features
16
- from .feat_moments import moments_based_features
17
+ from .feat_contour import moments_based_features, volume_from_contours
17
18
  from .feat_texture import haralick_texture_features
18
19
  from .gate import Gate
19
20
 
@@ -27,15 +28,17 @@ class QueueEventExtractor:
27
28
  def __init__(self,
28
29
  data: HDF5Data,
29
30
  gate: Gate,
30
- preselect: bool,
31
- ptp_median: float,
32
31
  raw_queue: mp.Queue,
33
32
  event_queue: mp.Queue,
34
33
  log_queue: mp.Queue,
35
34
  feat_nevents: mp.Array,
36
35
  label_array: mp.Array,
37
36
  finalize_extraction: mp.Value,
37
+ invalid_mask_counter: mp.Value,
38
+ worker_monitor: mp.RawArray,
39
+ log_level: int = None,
38
40
  extract_kwargs: dict = None,
41
+ worker_index: int = None,
39
42
  *args, **kwargs):
40
43
  """Base class for event extraction from label images
41
44
 
@@ -44,15 +47,10 @@ class QueueEventExtractor:
44
47
 
45
48
  Parameters
46
49
  ----------
47
- data:
50
+ data: HDF5Data
48
51
  Data source.
49
- gate:
52
+ gate: Gate
50
53
  Gating rules.
51
- preselect:
52
- Whether to perform data preselection based on peak-to-peak
53
- values in the images.
54
- ptp_median:
55
- Median peak-to-peak value in the images for preselction.
56
54
  raw_queue:
57
55
  Queue from which the worker obtains the chunks and
58
56
  indices of the labels to work on.
@@ -70,27 +68,42 @@ class QueueEventExtractor:
70
68
  finalize_extraction:
71
69
  Shared value indicating whether this worker should stop as
72
70
  soon as the `raw_queue` is empty.
71
+ invalid_mask_counter:
72
+ Counts masks labeled as invalid by the feature extractor
73
+ worker_monitor:
74
+ Monitors the frames each worker has processed. Only the
75
+ value in `worker_monitor[worker_index]` is modified.
76
+ log_level:
77
+ Logging level to use
73
78
  extract_kwargs:
74
79
  Keyword arguments for the extraction process. See the
75
80
  keyword-only arguments in
76
81
  :func:`QueueEventExtractor.get_events_from_masks`.
77
-
82
+ worker_index:
83
+ The index to increment values in `worker_monitor`
78
84
  """
79
85
  super(QueueEventExtractor, self).__init__(*args, **kwargs)
86
+ #: Worker index for populating
87
+ self.worker_index = worker_index or 0
80
88
  #: Data instance
81
89
  self.data = data
82
90
  #: Gating information
83
91
  self.gate = gate
84
- #: Whether to perform Preselection
85
- self.preselect = preselect
86
- #: Peak-to-peak median for preselection
87
- self.ptp_median = ptp_median
88
92
  #: queue containing sub-indices for `label_array`
89
93
  self.raw_queue = raw_queue
90
94
  #: queue with event-wise feature dictionaries
91
95
  self.event_queue = event_queue
92
96
  #: queue for logging
93
97
  self.log_queue = log_queue
98
+ #: invalid mask counter
99
+ self.invalid_mask_counter = invalid_mask_counter
100
+ #: worker busy counter
101
+ self.worker_monitor = worker_monitor
102
+ # Logging needs to be set up after `start` is called, otherwise
103
+ # it looks like we have the same PID as the parent process. We
104
+ # are setting up logging in `run`.
105
+ self.logger = None
106
+ self.log_level = log_level or logging.getLogger("dcnum").level
94
107
  #: Shared array of length `len(data)` into which the number of
95
108
  #: events per frame is written.
96
109
  self.feat_nevents = feat_nevents
@@ -105,30 +118,48 @@ class QueueEventExtractor:
105
118
  extract_kwargs.setdefault("haralick", True)
106
119
  #: Feature extraction keyword arguments.
107
120
  self.extract_kwargs = extract_kwargs
108
- # Logging needs to be set up after `start` is called, otherwise
109
- # it looks like we have the same PID as the parent process. We
110
- # are setting up logging in `run`.
111
- self.logger = None
112
121
 
113
122
  @staticmethod
114
- def get_init_kwargs(data, gate, preselect, ptp_median, log_queue):
115
- """You can pass `*args.values()` directly to __init__
123
+ def get_init_kwargs(data: HDF5Data,
124
+ gate: Gate,
125
+ num_extractors: int,
126
+ log_queue: mp.Queue,
127
+ log_level: int = None,
128
+ ):
129
+ """Get initialization arguments for :cass:`.QueueEventExtractor`
116
130
 
117
131
  This method was created for convenience reasons:
118
132
  - It makes sure that the order of arguments is correct, since it
119
133
  is implemented in the same class.
120
134
  - It simplifies testing.
135
+
136
+ Parameters
137
+ ----------
138
+ data: HDF5Data
139
+ Input data
140
+ gate: HDF5Data
141
+ Gating class to use
142
+ num_extractors: int
143
+ Number of extractors that will be used
144
+ log_queue: mp.Queue
145
+ Queue the worker uses for sending log messages
146
+ log_level: int
147
+ Logging level to use in the worker process
148
+
149
+ Returns
150
+ -------
151
+ args: dict
152
+ You can pass `*args.values()` directly to `__init__`
121
153
  """
122
154
  # queue with the raw (unsegmented) image data
123
155
  raw_queue = mp_spawn.Queue()
124
156
  # queue with event-wise feature dictionaries
125
157
  event_queue = mp_spawn.Queue()
126
158
 
159
+ # Note that the order must be identical to __init__
127
160
  args = collections.OrderedDict()
128
161
  args["data"] = data
129
162
  args["gate"] = gate
130
- args["preselect"] = preselect
131
- args["ptp_median"] = ptp_median
132
163
  args["raw_queue"] = raw_queue
133
164
  args["event_queue"] = event_queue
134
165
  args["log_queue"] = log_queue
@@ -139,37 +170,54 @@ class QueueEventExtractor:
139
170
  np.ctypeslib.ctypes.c_int16,
140
171
  int(np.prod(data.image.chunk_shape)))
141
172
  args["finalize_extraction"] = mp_spawn.Value("b", False)
173
+ args["invalid_mask_counter"] = mp_spawn.Value("L", 0)
174
+ args["worker_monitor"] = mp_spawn.RawArray("L", num_extractors)
175
+ args["log_level"] = log_level or logging.getLogger("dcnum").level
142
176
  return args
143
177
 
144
- @classmethod
145
- def get_ppid_from_kwargs(cls, kwargs):
146
- """Return the pipeline ID for this event extractor"""
147
- key = "legacy"
148
- cback = kwargs_to_ppid(cls, "get_events_from_masks", kwargs)
149
- return ":".join([key, cback])
150
-
151
178
  def get_events_from_masks(self, masks, data_index, *,
152
179
  brightness: bool = True,
153
180
  haralick: bool = True,
181
+ volume: bool = True,
154
182
  ):
155
183
  """Get events dictionary, performing event-based gating"""
156
184
  events = {"mask": masks}
157
185
  image = self.data.image[data_index][np.newaxis]
158
186
  image_bg = self.data.image_bg[data_index][np.newaxis]
159
187
  image_corr = self.data.image_corr[data_index][np.newaxis]
188
+ if "bg_off" in self.data:
189
+ bg_off = self.data["bg_off"][data_index]
190
+ else:
191
+ bg_off = None
160
192
 
161
193
  events.update(
162
194
  moments_based_features(
163
195
  masks,
164
- pixel_size=self.data.pixel_size))
196
+ pixel_size=self.data.pixel_size,
197
+ ret_contour=volume,
198
+ ))
199
+
165
200
  if brightness:
166
201
  events.update(brightness_features(
167
- mask=masks, image=image, image_bg=image_bg,
202
+ mask=masks,
203
+ image=image,
204
+ image_bg=image_bg,
205
+ bg_off=bg_off,
168
206
  image_corr=image_corr
169
207
  ))
170
208
  if haralick:
171
209
  events.update(haralick_texture_features(
172
- mask=masks, image=image, image_corr=image_corr
210
+ mask=masks,
211
+ image=image,
212
+ image_corr=image_corr,
213
+ ))
214
+
215
+ if volume:
216
+ events.update(volume_from_contours(
217
+ contour=events.pop("contour"), # remove contour from events!
218
+ pos_x=events["pos_x"],
219
+ pos_y=events["pos_y"],
220
+ pixel_size=self.data.pixel_size,
173
221
  ))
174
222
 
175
223
  # gating on feature arrays
@@ -193,8 +241,7 @@ class QueueEventExtractor:
193
241
  # over from gated_events to valid_events. According to our experience
194
242
  # invalid events happen rarely though.
195
243
  if np.any(invalid):
196
- self.logger.info(f"Discarded {np.sum(invalid)} events due to "
197
- "invalid segmentation.")
244
+ self.invalid_mask_counter.value += np.sum(invalid)
198
245
  for key in gated_events:
199
246
  valid_events[key] = gated_events[key][valid]
200
247
  else:
@@ -219,7 +266,7 @@ class QueueEventExtractor:
219
266
  """Return a unique feature extractor pipeline identifier
220
267
 
221
268
  The pipeline identifier is universally applicable and must
222
- be backwards-compatible (future versions of dcevent will
269
+ be backwards-compatible (future versions of dcnum will
223
270
  correctly acknowledge the ID).
224
271
 
225
272
  The feature extractor pipeline ID is defined as::
@@ -236,7 +283,29 @@ class QueueEventExtractor:
236
283
 
237
284
  b=1^h=1
238
285
  """
239
- return self.get_ppid_from_kwargs(self.extract_kwargs)
286
+ return self.get_ppid_from_ppkw(self.extract_kwargs)
287
+
288
+ @classmethod
289
+ def get_ppid_code(cls):
290
+ return "legacy"
291
+
292
+ @classmethod
293
+ def get_ppid_from_ppkw(cls, kwargs):
294
+ """Return the pipeline ID for this event extractor"""
295
+ code = cls.get_ppid_code()
296
+ cback = kwargs_to_ppid(cls, "get_events_from_masks", kwargs)
297
+ return ":".join([code, cback])
298
+
299
+ @staticmethod
300
+ def get_ppkw_from_ppid(extr_ppid):
301
+ code, pp_extr_kwargs = extr_ppid.split(":")
302
+ if code != QueueEventExtractor.get_ppid_code():
303
+ raise ValueError(
304
+ f"Could not find extraction method '{code}'!")
305
+ kwargs = ppid_to_kwargs(cls=QueueEventExtractor,
306
+ method="get_events_from_masks",
307
+ ppid=pp_extr_kwargs)
308
+ return kwargs
240
309
 
241
310
  def process_label(self, label, index):
242
311
  """Process one label image, extracting masks and features"""
@@ -245,11 +314,7 @@ class QueueEventExtractor:
245
314
  # TODO: Do this before segmentation already?
246
315
  # skip events that have been analyzed already
247
316
  return None
248
- if self.preselect:
249
- # TODO: Do this before segmentation already?
250
- ptp = np.ptp(self.data.image_corr[index])
251
- if ptp < 0.1 * self.ptp_median:
252
- return None
317
+
253
318
  masks = self.get_masks_from_label(label)
254
319
  if masks.size:
255
320
  events = self.get_events_from_masks(
@@ -260,19 +325,30 @@ class QueueEventExtractor:
260
325
 
261
326
  def run(self):
262
327
  """Main loop of worker process"""
328
+ self.worker_monitor[self.worker_index] = 0
263
329
  # Don't wait for these two queues when joining workers
264
330
  self.raw_queue.cancel_join_thread()
265
- self.log_queue.cancel_join_thread()
266
331
  #: logger sends all logs to `self.log_queue`
267
332
  self.logger = logging.getLogger(
268
333
  f"dcnum.feat.EventExtractor.{os.getpid()}")
334
+ self.logger.setLevel(self.log_level)
335
+ # Clear any handlers that might be set for this logger. This is
336
+ # important for the case when we are an instance of
337
+ # EventExtractorThread, because then all handlers from the main
338
+ # thread are inherited (as opposed to no handlers in the case
339
+ # of EventExtractorProcess).
340
+ self.logger.handlers.clear()
269
341
  queue_handler = QueueHandler(self.log_queue)
342
+ queue_handler.setLevel(self.log_level)
270
343
  self.logger.addHandler(queue_handler)
271
- self.logger.addFilter(DeduplicatingLoggingFilter())
272
- self.logger.debug(f"Running {self} in PID {os.getpid()}")
344
+ self.logger.info("Ready")
273
345
 
274
346
  mp_array = np.ctypeslib.as_array(
275
347
  self.label_array).reshape(self.data.image.chunk_shape)
348
+
349
+ # only close queues when we have created them ourselves.
350
+ close_queues = isinstance(self, EventExtractorProcess)
351
+
276
352
  while True:
277
353
  try:
278
354
  chunk_index, label_index = self.raw_queue.get(timeout=.03)
@@ -281,8 +357,7 @@ class QueueEventExtractor:
281
357
  if self.finalize_extraction.value:
282
358
  # The manager told us that there is nothing more coming.
283
359
  self.logger.debug(
284
- f"Finalizing worker {self} with PID {os.getpid()}. "
285
- f"{self.event_queue.qsize()} events are still queued.")
360
+ f"Finalizing worker {self} with PID {os.getpid()}")
286
361
  break
287
362
  else:
288
363
  try:
@@ -298,16 +373,24 @@ class QueueEventExtractor:
298
373
  else:
299
374
  self.feat_nevents[index] = 0
300
375
  self.event_queue.put((index, events))
376
+ self.worker_monitor[self.worker_index] += 1
301
377
 
302
378
  self.logger.debug(f"Finalizing `run` for PID {os.getpid()}, {self}")
303
- # Explicitly close the event queue and join it
304
- self.event_queue.close()
305
- self.event_queue.join_thread()
306
- self.logger.debug(f"End of `run` for PID {os.getpid()}, {self}")
307
- # Also close the logging queue. Not that not all messages might
308
- # arrive in the logging queue, since we called `cancel_join_thread`
309
- # earlier.
310
- self.log_queue.close()
379
+ if close_queues:
380
+ # Explicitly close the event queue and join it
381
+ self.event_queue.close()
382
+ self.event_queue.join_thread()
383
+ self.logger.debug(f"End of `run` for PID {os.getpid()}, {self}")
384
+
385
+ # Make sure everything gets written to the queue.
386
+ queue_handler.flush()
387
+
388
+ if close_queues:
389
+ # Also close the logging queue. Note that not all messages might
390
+ # arrive in the logging queue, since we called `cancel_join_thread`
391
+ # earlier.
392
+ self.log_queue.close()
393
+ self.log_queue.join_thread()
311
394
 
312
395
 
313
396
  class EventExtractorProcess(QueueEventExtractor, mp_spawn.Process):
@@ -322,17 +405,3 @@ class EventExtractorThread(QueueEventExtractor, threading.Thread):
322
405
  def __init__(self, *args, **kwargs):
323
406
  super(EventExtractorThread, self).__init__(
324
407
  name="EventExtractorThread", *args, **kwargs)
325
-
326
-
327
- class DeduplicatingLoggingFilter(logging.Filter):
328
- def __init__(self, *args, **kwargs):
329
- super(DeduplicatingLoggingFilter, self).__init__(*args, **kwargs)
330
- self._records = []
331
-
332
- def filter(self, record):
333
- """Return True if the record should be logged"""
334
- msg = record.getMessage()
335
- logged = msg in self._records
336
- if not logged:
337
- self._records.append(msg)
338
- return not logged
@@ -0,0 +1,5 @@
1
+ # flake8: noqa: F401
2
+ """Logic for running the dcnum pipeline"""
3
+ from .ctrl import DCNumJobRunner
4
+ from .job import DCNumPipelineJob
5
+ from .json_encoder import ExtendedJSONEncoder