dcnum 0.13.2__py3-none-any.whl → 0.23.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

Files changed (55) hide show
  1. dcnum/_version.py +2 -2
  2. dcnum/feat/__init__.py +2 -1
  3. dcnum/feat/event_extractor_manager_thread.py +67 -33
  4. dcnum/feat/feat_background/__init__.py +3 -12
  5. dcnum/feat/feat_background/base.py +80 -65
  6. dcnum/feat/feat_background/bg_copy.py +31 -0
  7. dcnum/feat/feat_background/bg_roll_median.py +38 -30
  8. dcnum/feat/feat_background/bg_sparse_median.py +96 -45
  9. dcnum/feat/feat_brightness/__init__.py +1 -0
  10. dcnum/feat/feat_brightness/bright_all.py +41 -6
  11. dcnum/feat/feat_contour/__init__.py +4 -0
  12. dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
  13. dcnum/feat/feat_contour/volume.py +174 -0
  14. dcnum/feat/feat_texture/__init__.py +1 -0
  15. dcnum/feat/feat_texture/tex_all.py +28 -1
  16. dcnum/feat/gate.py +92 -70
  17. dcnum/feat/queue_event_extractor.py +139 -70
  18. dcnum/logic/__init__.py +5 -0
  19. dcnum/logic/ctrl.py +794 -0
  20. dcnum/logic/job.py +184 -0
  21. dcnum/logic/json_encoder.py +19 -0
  22. dcnum/meta/__init__.py +1 -0
  23. dcnum/meta/paths.py +30 -0
  24. dcnum/meta/ppid.py +66 -9
  25. dcnum/read/__init__.py +1 -0
  26. dcnum/read/cache.py +109 -77
  27. dcnum/read/const.py +6 -4
  28. dcnum/read/hdf5_data.py +190 -31
  29. dcnum/read/mapped.py +87 -0
  30. dcnum/segm/__init__.py +6 -15
  31. dcnum/segm/segm_thresh.py +7 -14
  32. dcnum/segm/segm_torch/__init__.py +19 -0
  33. dcnum/segm/segm_torch/segm_torch_base.py +125 -0
  34. dcnum/segm/segm_torch/segm_torch_mpo.py +71 -0
  35. dcnum/segm/segm_torch/segm_torch_sto.py +88 -0
  36. dcnum/segm/segm_torch/torch_model.py +95 -0
  37. dcnum/segm/segm_torch/torch_postproc.py +93 -0
  38. dcnum/segm/segm_torch/torch_preproc.py +114 -0
  39. dcnum/segm/segmenter.py +245 -96
  40. dcnum/segm/segmenter_manager_thread.py +39 -28
  41. dcnum/segm/{segmenter_cpu.py → segmenter_mpo.py} +137 -43
  42. dcnum/segm/segmenter_sto.py +110 -0
  43. dcnum/write/__init__.py +3 -1
  44. dcnum/write/deque_writer_thread.py +15 -5
  45. dcnum/write/queue_collector_thread.py +14 -17
  46. dcnum/write/writer.py +225 -55
  47. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/METADATA +4 -2
  48. dcnum-0.23.1.dist-info/RECORD +55 -0
  49. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/WHEEL +1 -1
  50. dcnum/feat/feat_moments/__init__.py +0 -3
  51. dcnum/segm/segmenter_gpu.py +0 -45
  52. dcnum-0.13.2.dist-info/RECORD +0 -40
  53. /dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
  54. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/LICENSE +0 -0
  55. {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/top_level.txt +0 -0
@@ -2,8 +2,10 @@ import abc
2
2
  import multiprocessing as mp
3
3
  import time
4
4
  import threading
5
+ from typing import Dict
5
6
 
6
7
  import numpy as np
8
+ import scipy.ndimage as ndi
7
9
 
8
10
  from .segmenter import Segmenter
9
11
 
@@ -13,14 +15,41 @@ from .segmenter import Segmenter
13
15
  mp_spawn = mp.get_context('spawn')
14
16
 
15
17
 
16
- class CPUSegmenter(Segmenter, abc.ABC):
17
- def __init__(self, num_workers=None, *args, **kwargs):
18
- super(CPUSegmenter, self).__init__(*args, **kwargs)
18
+ class MPOSegmenter(Segmenter, abc.ABC):
19
+ hardware_processor = "cpu"
20
+
21
+ def __init__(self,
22
+ *,
23
+ num_workers: int = None,
24
+ kwargs_mask: Dict = None,
25
+ debug: bool = False,
26
+ **kwargs):
27
+ """Segmenter with multiprocessing operation
28
+
29
+ Parameters
30
+ ----------
31
+ kwargs_mask: dict
32
+ Keyword arguments for mask post-processing (see `process_mask`)
33
+ debug: bool
34
+ Debugging parameters
35
+ kwargs:
36
+ Additional, optional keyword arguments for `segment_algorithm`
37
+ defined in the subclass.
38
+ """
39
+ super(MPOSegmenter, self).__init__(kwargs_mask=kwargs_mask,
40
+ debug=debug,
41
+ **kwargs)
19
42
  self.num_workers = num_workers or mp.cpu_count()
43
+ # batch input image data
20
44
  self.mp_image_raw = None
21
45
  self._mp_image_np = None
46
+ # batch output image data
22
47
  self.mp_labels_raw = None
23
48
  self._mp_labels_np = None
49
+ # batch image background offset
50
+ self.mp_bg_off_raw = None
51
+ self._mp_bg_off_np = None
52
+ # workers
24
53
  self._mp_workers = []
25
54
  # Image shape of the input array
26
55
  self.image_shape = None
@@ -56,6 +85,7 @@ class CPUSegmenter(Segmenter, abc.ABC):
56
85
  del state["logger"]
57
86
  del state["_mp_image_np"]
58
87
  del state["_mp_labels_np"]
88
+ del state["_mp_bg_off_np"]
59
89
  del state["_mp_workers"]
60
90
  return state
61
91
 
@@ -64,26 +94,26 @@ class CPUSegmenter(Segmenter, abc.ABC):
64
94
  self.__dict__.update(state)
65
95
 
66
96
  @staticmethod
67
- def _create_shared_array(image_shape, batch_size, dtype):
97
+ def _create_shared_array(array_shape, batch_size, dtype):
68
98
  """Return raw and numpy-view on shared array
69
99
 
70
100
  Parameters
71
101
  ----------
72
- image_shape: tuple of int
102
+ array_shape: tuple of int
73
103
  Shape of one single image in the array
74
104
  batch_size: int
75
105
  Number of images in the array
76
106
  dtype:
77
- ctype, e.g. `np.ctypeslib.ctypes.c_uint8`
78
- or `np.ctypeslib.ctypes.c_bool`
107
+ numpy dtype
79
108
  """
80
- sx, sy = image_shape
81
- sa_raw = mp_spawn.RawArray(dtype, int(sx * sy * batch_size))
109
+ ctype = np.ctypeslib.as_ctypes_type(dtype)
110
+ sa_raw = mp_spawn.RawArray(ctype,
111
+ int(np.prod(array_shape) * batch_size))
82
112
  # Convert the RawArray to something we can write to fast
83
113
  # (similar to memory view, but without having to cast) using
84
114
  # np.ctypeslib.as_array. See discussion in
85
115
  # https://stackoverflow.com/questions/37705974
86
- sa_np = np.ctypeslib.as_array(sa_raw).reshape(batch_size, sx, sy)
116
+ sa_np = np.ctypeslib.as_array(sa_raw).reshape(batch_size, *array_shape)
87
117
  return sa_raw, sa_np
88
118
 
89
119
  @property
@@ -105,39 +135,49 @@ class CPUSegmenter(Segmenter, abc.ABC):
105
135
  [w.join() for w in self._mp_workers]
106
136
 
107
137
  def segment_batch(self,
108
- image_data: np.ndarray,
138
+ images: np.ndarray,
109
139
  start: int = None,
110
- stop: int = None):
111
- """Perform batch segmentation of `image_data`
140
+ stop: int = None,
141
+ bg_off: np.ndarray = None,
142
+ ):
143
+ """Perform batch segmentation of `images`
144
+
145
+ Before segmentation, an optional background offset correction with
146
+ `bg_off` is performed. After segmentation, mask postprocessing is
147
+ performed according to the class definition.
112
148
 
113
149
  Parameters
114
150
  ----------
115
- image_data: 3d np.ndarray
151
+ images: 3d np.ndarray
116
152
  The time-series image data. First axis is time.
117
153
  start: int
118
- First index to analyze in `image_data`
154
+ First index to analyze in `images`
119
155
  stop: int
120
- Index after the last index to analyze in `image_data`
156
+ Index after the last index to analyze in `images`
157
+ bg_off: 1D np.ndarray
158
+ Optional 1D numpy array with background offset
121
159
 
122
160
  Notes
123
161
  -----
124
162
  - If the segmentation algorithm only accepts background-corrected
125
- images, then `image_data` must already be background-corrected.
163
+ images, then `images` must already be background-corrected,
164
+ except for the optional `bg_off`.
126
165
  """
127
166
  if stop is None or start is None:
128
167
  start = 0
129
- stop = len(image_data)
168
+ stop = len(images)
130
169
 
131
170
  batch_size = stop - start
132
- size = np.prod(image_data.shape[1:]) * batch_size
171
+ size = np.prod(images.shape[1:]) * batch_size
133
172
 
134
173
  if self.image_shape is None:
135
- self.image_shape = image_data[0].shape
174
+ self.image_shape = images[0].shape
136
175
 
137
176
  if self._mp_image_np is not None and self._mp_image_np.size != size:
138
177
  # reset image data
139
178
  self._mp_image_np = None
140
179
  self._mp_labels_np = None
180
+ self._mp_bg_off_np = None
141
181
  # TODO: If only the batch_size changes, don't
142
182
  # reinitialize the workers. Otherwise, the final rest of
143
183
  # analyzing a dataset would always take a little longer.
@@ -146,30 +186,48 @@ class CPUSegmenter(Segmenter, abc.ABC):
146
186
  self.mp_batch_index.value = -1
147
187
  self.mp_shutdown.value = 0
148
188
 
189
+ if bg_off is not None:
190
+ if not self.requires_background_correction:
191
+ raise ValueError(f"The segmenter {self.__class__.__name__} "
192
+ f"does not employ background correction, "
193
+ f"but the `bg_off` keyword argument was "
194
+ f"passed to `segment_chunk`. Please check "
195
+ f"your analysis pipeline.")
196
+ # background offset
197
+ if self._mp_bg_off_np is None:
198
+ self.mp_bg_off_raw, self._mp_bg_off_np = \
199
+ self._create_shared_array(
200
+ array_shape=(stop - start,),
201
+ batch_size=batch_size,
202
+ dtype=np.float64)
203
+ self._mp_bg_off_np[:] = bg_off[start:stop]
204
+
205
+ # input images
149
206
  if self._mp_image_np is None:
150
207
  self.mp_image_raw, self._mp_image_np = self._create_shared_array(
151
- image_shape=self.image_shape,
208
+ array_shape=self.image_shape,
152
209
  batch_size=batch_size,
153
- dtype=np.ctypeslib.ctypes.c_int32,
210
+ dtype=images.dtype,
154
211
  )
212
+ self._mp_image_np[:] = images[start:stop]
155
213
 
214
+ # output labels
156
215
  if self._mp_labels_np is None:
157
216
  self.mp_labels_raw, self._mp_labels_np = self._create_shared_array(
158
- image_shape=self.image_shape,
217
+ array_shape=self.image_shape,
159
218
  batch_size=batch_size,
160
- dtype=np.ctypeslib.ctypes.c_uint16,
219
+ dtype=np.uint16,
161
220
  )
162
221
 
163
- # populate image data
164
- self._mp_image_np[:] = image_data[start:stop]
165
-
166
222
  # Create the workers
167
223
  if self.debug:
168
- worker_cls = CPUSegmenterWorkerThread
224
+ worker_cls = MPOSegmenterWorkerThread
169
225
  num_workers = 1
226
+ self.logger.debug("Running with one worker in main thread")
170
227
  else:
171
- worker_cls = CPUSegmenterWorkerProcess
172
- num_workers = min(self.num_workers, image_data.shape[0])
228
+ worker_cls = MPOSegmenterWorkerProcess
229
+ num_workers = min(self.num_workers, images.shape[0])
230
+ self.logger.debug(f"Running with {num_workers} workers")
173
231
 
174
232
  if not self._mp_workers:
175
233
  step_size = batch_size // num_workers
@@ -200,8 +258,33 @@ class CPUSegmenter(Segmenter, abc.ABC):
200
258
 
201
259
  return self._mp_labels_np
202
260
 
261
+ def segment_single(self, image, bg_off: float = None):
262
+ """Return the integer label image for an input image
263
+
264
+ Before segmentation, an optional background offset correction with
265
+ `bg_off` is performed. After segmentation, mask postprocessing is
266
+ performed according to the class definition.
267
+ """
268
+ segm_wrap = self.segment_algorithm_wrapper()
269
+ # optional subtraction of background offset
270
+ if bg_off is not None:
271
+ image = image - bg_off
272
+ # obtain mask or label
273
+ mol = segm_wrap(image)
274
+ if mol.dtype == bool:
275
+ # convert mask to labels
276
+ labels, _ = ndi.label(
277
+ input=mol,
278
+ structure=ndi.generate_binary_structure(2, 2))
279
+ else:
280
+ labels = mol
281
+ # optional mask/label postprocessing
282
+ if self.mask_postprocessing:
283
+ labels = self.process_mask(labels, **self.kwargs_mask)
284
+ return labels
203
285
 
204
- class CPUSegmenterWorker:
286
+
287
+ class MPOSegmenterWorker:
205
288
  def __init__(self,
206
289
  segmenter,
207
290
  sl_start: int,
@@ -211,7 +294,7 @@ class CPUSegmenterWorker:
211
294
 
212
295
  Parameters
213
296
  ----------
214
- segmenter: CPUSegmenter
297
+ segmenter: MPOSegmenter
215
298
  The segmentation instance
216
299
  sl_start: int
217
300
  Start of slice of input array to process
@@ -219,7 +302,7 @@ class CPUSegmenterWorker:
219
302
  Stop of slice of input array to process
220
303
  """
221
304
  # Must call super init, otherwise Thread or Process are not initialized
222
- super(CPUSegmenterWorker, self).__init__()
305
+ super(MPOSegmenterWorker, self).__init__()
223
306
  self.segmenter = segmenter
224
307
  # Value incrementing the batch index. Starts with 0 and is
225
308
  # incremented every time :func:`Segmenter.segment_batch` is
@@ -231,8 +314,10 @@ class CPUSegmenterWorker:
231
314
  # Shutdown bit tells workers to stop when set to != 0
232
315
  self.shutdown = segmenter.mp_shutdown
233
316
  # The image data for segmentation
234
- self.image_data_raw = segmenter.mp_image_raw
235
- # Boolean mask array
317
+ self.image_arr_raw = segmenter.mp_image_raw
318
+ # Background data offset
319
+ self.bg_off = segmenter.mp_bg_off_raw
320
+ # Integer output label array
236
321
  self.labels_data_raw = segmenter.mp_labels_raw
237
322
  # The shape of one image
238
323
  self.image_shape = segmenter.image_shape
@@ -244,10 +329,14 @@ class CPUSegmenterWorker:
244
329
  # We have to create the numpy-versions of the mp.RawArrays here,
245
330
  # otherwise we only get some kind of copy in the new process
246
331
  # when we use "spawn" instead of "fork".
247
- labels_data = np.ctypeslib.as_array(self.labels_data_raw).reshape(
332
+ labels_arr = np.ctypeslib.as_array(self.labels_data_raw).reshape(
248
333
  -1, self.image_shape[0], self.image_shape[1])
249
- image_data = np.ctypeslib.as_array(self.image_data_raw).reshape(
334
+ image_arr = np.ctypeslib.as_array(self.image_arr_raw).reshape(
250
335
  -1, self.image_shape[0], self.image_shape[1])
336
+ if self.bg_off is not None:
337
+ bg_off_data = np.ctypeslib.as_array(self.bg_off)
338
+ else:
339
+ bg_off_data = None
251
340
 
252
341
  idx = self.sl_start
253
342
  itr = 0 # current iteration (incremented when we reach self.sl_stop)
@@ -261,8 +350,13 @@ class CPUSegmenterWorker:
261
350
  with self.batch_worker:
262
351
  self.batch_worker.value += 1
263
352
  else:
264
- labels_data[idx, :, :] = self.segmenter.segment_frame(
265
- image_data[idx])
353
+ if bg_off_data is None:
354
+ bg_off = None
355
+ else:
356
+ bg_off = bg_off_data[idx]
357
+
358
+ labels_arr[idx, :, :] = self.segmenter.segment_single(
359
+ image=image_arr[idx], bg_off=bg_off)
266
360
  idx += 1
267
361
  elif self.shutdown.value:
268
362
  break
@@ -271,11 +365,11 @@ class CPUSegmenterWorker:
271
365
  time.sleep(.01)
272
366
 
273
367
 
274
- class CPUSegmenterWorkerProcess(CPUSegmenterWorker, mp_spawn.Process):
368
+ class MPOSegmenterWorkerProcess(MPOSegmenterWorker, mp_spawn.Process):
275
369
  def __init__(self, *args, **kwargs):
276
- super(CPUSegmenterWorkerProcess, self).__init__(*args, **kwargs)
370
+ super(MPOSegmenterWorkerProcess, self).__init__(*args, **kwargs)
277
371
 
278
372
 
279
- class CPUSegmenterWorkerThread(CPUSegmenterWorker, threading.Thread):
373
+ class MPOSegmenterWorkerThread(MPOSegmenterWorker, threading.Thread):
280
374
  def __init__(self, *args, **kwargs):
281
- super(CPUSegmenterWorkerThread, self).__init__(*args, **kwargs)
375
+ super(MPOSegmenterWorkerThread, self).__init__(*args, **kwargs)
@@ -0,0 +1,110 @@
1
+ import abc
2
+ from typing import Dict
3
+
4
+ import numpy as np
5
+ import scipy.ndimage as ndi
6
+
7
+
8
+ from .segmenter import Segmenter
9
+
10
+
11
+ class STOSegmenter(Segmenter, abc.ABC):
12
+ hardware_processor = "gpu"
13
+
14
+ def __init__(self,
15
+ *,
16
+ num_workers: int = None,
17
+ kwargs_mask: Dict = None,
18
+ debug: bool = False,
19
+ **kwargs
20
+ ):
21
+ """Segmenter with single thread operation
22
+
23
+ Parameters
24
+ ----------
25
+ kwargs_mask: dict
26
+ Keyword arguments for mask post-processing (see `process_mask`)
27
+ debug: bool
28
+ Debugging parameters
29
+ kwargs:
30
+ Additional, optional keyword arguments for `segment_algorithm`
31
+ defined in the subclass.
32
+ """
33
+ if num_workers not in [None, 1]:
34
+ raise ValueError(f"Number of workers must not be larger than 1 "
35
+ f"for GPU segmenter, got '{num_workers}'!")
36
+ super(STOSegmenter, self).__init__(kwargs_mask=kwargs_mask,
37
+ debug=debug,
38
+ **kwargs)
39
+
40
+ def segment_batch(self,
41
+ images: np.ndarray,
42
+ start: int = None,
43
+ stop: int = None,
44
+ bg_off: np.ndarray = None,
45
+ ):
46
+ """Perform batch segmentation of `images`
47
+
48
+ Before segmentation, an optional background offset correction with
49
+ `bg_off` is performed. After segmentation, mask postprocessing is
50
+ performed according to the class definition.
51
+
52
+ Parameters
53
+ ----------
54
+ images: 3d np.ndarray
55
+ The time-series image data. First axis is time.
56
+ start: int
57
+ First index to analyze in `images`
58
+ stop: int
59
+ Index after the last index to analyze in `images`
60
+ bg_off: 1D np.ndarray
61
+ Optional 1D numpy array with background offset
62
+
63
+ Notes
64
+ -----
65
+ - If the segmentation algorithm only accepts background-corrected
66
+ images, then `images` must already be background-corrected,
67
+ except for the optional `bg_off`.
68
+ """
69
+ if stop is None or start is None:
70
+ start = 0
71
+ stop = len(images)
72
+
73
+ image_slice = images[start:stop]
74
+ segm = self.segment_algorithm_wrapper()
75
+
76
+ if bg_off is not None:
77
+ if not self.requires_background_correction:
78
+ raise ValueError(f"The segmenter {self.__class__.__name__} "
79
+ f"does not employ background correction, "
80
+ f"but the `bg_off` keyword argument was "
81
+ f"passed to `segment_chunk`. Please check "
82
+ f"your analysis pipeline.")
83
+ image_slice = image_slice - bg_off.reshape(-1, 1, 1)
84
+ labels = segm(image_slice)
85
+
86
+ # Make sure we have integer labels and perform mask postprocessing
87
+ if labels.dtype == bool:
88
+ new_labels = np.zeros_like(labels, dtype=np.uint16)
89
+ for ii in range(len(labels)):
90
+ ndi.label(
91
+ input=labels[ii],
92
+ output=new_labels[ii],
93
+ structure=ndi.generate_binary_structure(2, 2))
94
+ labels = new_labels
95
+
96
+ # Perform mask postprocessing
97
+ if self.mask_postprocessing:
98
+ for ii in range(len(labels)):
99
+ labels[ii] = self.process_mask(labels[ii], **self.kwargs_mask)
100
+
101
+ return labels
102
+
103
+ def segment_single(self, image, bg_off: float = None):
104
+ """This is a convenience-wrapper around `segment_batch`"""
105
+ if bg_off is None:
106
+ bg_off_batch = None
107
+ else:
108
+ bg_off_batch = np.atleast_1d(bg_off)
109
+ images = image[np.newaxis]
110
+ return self.segment_batch(images, bg_off=bg_off_batch)[0]
dcnum/write/__init__.py CHANGED
@@ -1,4 +1,6 @@
1
1
  # flake8: noqa: F401
2
2
  from .deque_writer_thread import DequeWriterThread
3
3
  from .queue_collector_thread import EventStash, QueueCollectorThread
4
- from .writer import HDF5Writer, copy_metadata, create_with_basins
4
+ from .writer import (
5
+ HDF5Writer, copy_features, copy_metadata, create_with_basins,
6
+ set_default_filter_kwargs)
@@ -1,14 +1,17 @@
1
1
  import collections
2
+ import logging
2
3
  import pathlib
3
4
  import threading
4
5
  import time
5
6
 
7
+ import h5py
8
+
6
9
  from .writer import HDF5Writer
7
10
 
8
11
 
9
12
  class DequeWriterThread(threading.Thread):
10
13
  def __init__(self,
11
- path_out: pathlib.Path,
14
+ path_out: pathlib.Path | h5py.File,
12
15
  dq: collections.deque,
13
16
  ds_kwds: dict = None,
14
17
  mode: str = "a",
@@ -19,11 +22,12 @@ class DequeWriterThread(threading.Thread):
19
22
  ----------
20
23
  path_out:
21
24
  Path to the output HDF5 file
22
- dq:
25
+ dq: collections.deque
23
26
  `collections.deque` object from which data are taken
24
27
  using `popleft()`.
25
28
  """
26
29
  super(DequeWriterThread, self).__init__(*args, **kwargs)
30
+ self.logger = logging.getLogger("dcnum.write.DequeWriterThread")
27
31
  if mode == "w":
28
32
  path_out.unlink(missing_ok=True)
29
33
  self.writer = HDF5Writer(path_out, mode=mode, ds_kwds=ds_kwds)
@@ -40,15 +44,21 @@ class DequeWriterThread(threading.Thread):
40
44
  self.may_stop_loop = True
41
45
 
42
46
  def run(self):
47
+ time_tot = 0
43
48
  while True:
49
+ ldq = len(self.dq)
44
50
  if self.must_stop_loop:
45
51
  break
46
- elif len(self.dq):
47
- feat, data = self.dq.popleft()
48
- self.writer.store_feature_chunk(feat=feat, data=data)
52
+ elif ldq:
53
+ t0 = time.perf_counter()
54
+ for _ in range(ldq):
55
+ feat, data = self.dq.popleft()
56
+ self.writer.store_feature_chunk(feat=feat, data=data)
57
+ time_tot += time.perf_counter() - t0
49
58
  elif self.may_stop_loop:
50
59
  break
51
60
  else:
52
61
  # wait for the next item to arrive
53
62
  time.sleep(.1)
63
+ self.logger.info(f"Disk time: {time_tot:.1f}s")
54
64
  self.writer.close()
@@ -171,7 +171,7 @@ class QueueCollectorThread(threading.Thread):
171
171
  self.event_queue.cancel_join_thread()
172
172
  # Indexes the current frame in `self.data`.
173
173
  last_idx = 0
174
- self.logger.debug("Started collector thread.")
174
+ self.logger.debug("Started collector thread")
175
175
  while True:
176
176
  # Slice of the shared nevents array. If it contains -1 values,
177
177
  # this means that some of the frames have not yet been processed.
@@ -184,10 +184,10 @@ class QueueCollectorThread(threading.Thread):
184
184
 
185
185
  if len(cur_nevents) == 0:
186
186
  self.logger.info(
187
- "Reached the end of the current dataset (frame "
187
+ "Reached dataset end (frame "
188
188
  # `last_idx` is the size of the dataset in the end,
189
189
  # because `len(cur_nevents)` is always added to it.
190
- f"{last_idx} of {len(self.feat_nevents)}).")
190
+ f"{last_idx} of {len(self.feat_nevents)})")
191
191
  break
192
192
 
193
193
  # We have reached the writer threshold. This means the extractor
@@ -245,20 +245,14 @@ class QueueCollectorThread(threading.Thread):
245
245
  # the events that we just saved.
246
246
  indices = stash.indices_for_data
247
247
 
248
- # Write all the scalar features.
249
- for feat in self.data.features_scalar_frame:
250
- self.writer_dq.append((feat, self.data[feat][indices]))
251
-
252
- # Write the image and background data.
253
- imdat = np.zeros((stash.size,) + self.data.image.image_shape,
254
- dtype=np.uint8)
255
- bgdat = np.zeros((stash.size,) + self.data.image.image_shape,
256
- dtype=np.uint8)
257
- for ii, idx in enumerate(indices):
258
- imdat[ii] = self.data.image[idx]
259
- bgdat[ii] = self.data.image_bg[idx]
260
- self.writer_dq.append(("image", imdat))
261
- self.writer_dq.append(("image_bg", bgdat))
248
+ # This is the unmapped index from the input HDF5Data instance.
249
+ # Unmapped means that this only enumerates HDF5Data, but since
250
+ # HDF5Data can be mapped, the index does not necessarily enumerate
251
+ # the underlying HDF5 file. Later on, we will have to convert this
252
+ # to the correct "basinmap0" feature
253
+ # (see `DCNumJobRunner.task_enforce_basin_strategy`)
254
+ self.writer_dq.append(("index_unmapped",
255
+ np.array(indices, dtype=np.uint32)))
262
256
 
263
257
  # Write the number of events.
264
258
  self.writer_dq.append(("nevents",
@@ -273,3 +267,6 @@ class QueueCollectorThread(threading.Thread):
273
267
 
274
268
  # Increment current frame index.
275
269
  last_idx += len(cur_nevents)
270
+
271
+ self.logger.info(f"Counted {self.written_events} events")
272
+ self.logger.debug(f"Counted {self.written_frames} frames")