dcnum 0.16.1__tar.gz → 0.16.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

Files changed (91) hide show
  1. {dcnum-0.16.1 → dcnum-0.16.3}/CHANGELOG +20 -0
  2. {dcnum-0.16.1/src/dcnum.egg-info → dcnum-0.16.3}/PKG-INFO +1 -1
  3. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/_version.py +2 -2
  4. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/event_extractor_manager_thread.py +21 -5
  5. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_background/base.py +22 -13
  6. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_background/bg_roll_median.py +8 -15
  7. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_background/bg_sparse_median.py +27 -27
  8. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/queue_event_extractor.py +41 -31
  9. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/logic/ctrl.py +171 -49
  10. dcnum-0.16.3/src/dcnum/logic/json_encoder.py +17 -0
  11. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/read/cache.py +1 -1
  12. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/read/hdf5_data.py +1 -1
  13. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/segm/segmenter_gpu.py +4 -0
  14. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/segm/segmenter_manager_thread.py +4 -1
  15. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/write/__init__.py +2 -1
  16. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/write/queue_collector_thread.py +4 -1
  17. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/write/writer.py +46 -40
  18. {dcnum-0.16.1 → dcnum-0.16.3/src/dcnum.egg-info}/PKG-INFO +1 -1
  19. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum.egg-info/SOURCES.txt +1 -0
  20. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_feat_background_base.py +2 -0
  21. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_feat_background_bg_roll_median.py +2 -0
  22. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_feat_background_bg_sparsemed.py +2 -0
  23. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_logic_pipeline.py +35 -14
  24. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_write_writer.py +16 -0
  25. {dcnum-0.16.1 → dcnum-0.16.3}/.github/workflows/check.yml +0 -0
  26. {dcnum-0.16.1 → dcnum-0.16.3}/.github/workflows/deploy_pypi.yml +0 -0
  27. {dcnum-0.16.1 → dcnum-0.16.3}/.gitignore +0 -0
  28. {dcnum-0.16.1 → dcnum-0.16.3}/.readthedocs.yml +0 -0
  29. {dcnum-0.16.1 → dcnum-0.16.3}/LICENSE +0 -0
  30. {dcnum-0.16.1 → dcnum-0.16.3}/README.rst +0 -0
  31. {dcnum-0.16.1 → dcnum-0.16.3}/docs/conf.py +0 -0
  32. {dcnum-0.16.1 → dcnum-0.16.3}/docs/extensions/github_changelog.py +0 -0
  33. {dcnum-0.16.1 → dcnum-0.16.3}/docs/index.rst +0 -0
  34. {dcnum-0.16.1 → dcnum-0.16.3}/docs/requirements.txt +0 -0
  35. {dcnum-0.16.1 → dcnum-0.16.3}/pyproject.toml +0 -0
  36. {dcnum-0.16.1 → dcnum-0.16.3}/setup.cfg +0 -0
  37. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/__init__.py +0 -0
  38. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/__init__.py +0 -0
  39. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_background/__init__.py +0 -0
  40. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_background/bg_copy.py +0 -0
  41. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_brightness/__init__.py +0 -0
  42. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_brightness/bright_all.py +0 -0
  43. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_brightness/common.py +0 -0
  44. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_moments/__init__.py +0 -0
  45. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_moments/ct_opencv.py +0 -0
  46. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_moments/mt_legacy.py +0 -0
  47. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_texture/__init__.py +0 -0
  48. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_texture/common.py +0 -0
  49. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/feat_texture/tex_all.py +0 -0
  50. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/feat/gate.py +0 -0
  51. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/logic/__init__.py +0 -0
  52. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/logic/job.py +0 -0
  53. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/meta/__init__.py +0 -0
  54. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/meta/ppid.py +0 -0
  55. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/read/__init__.py +0 -0
  56. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/read/const.py +0 -0
  57. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/segm/__init__.py +0 -0
  58. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/segm/segm_thresh.py +0 -0
  59. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/segm/segmenter.py +0 -0
  60. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/segm/segmenter_cpu.py +0 -0
  61. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum/write/deque_writer_thread.py +0 -0
  62. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum.egg-info/dependency_links.txt +0 -0
  63. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum.egg-info/requires.txt +0 -0
  64. {dcnum-0.16.1 → dcnum-0.16.3}/src/dcnum.egg-info/top_level.txt +0 -0
  65. {dcnum-0.16.1 → dcnum-0.16.3}/tests/conftest.py +0 -0
  66. {dcnum-0.16.1 → dcnum-0.16.3}/tests/data/fmt-hdf5_cytoshot_extended-moments-features.zip +0 -0
  67. {dcnum-0.16.1 → dcnum-0.16.3}/tests/data/fmt-hdf5_cytoshot_full-features_2023.zip +0 -0
  68. {dcnum-0.16.1 → dcnum-0.16.3}/tests/data/fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip +0 -0
  69. {dcnum-0.16.1 → dcnum-0.16.3}/tests/data/fmt-hdf5_shapein_raw-with-variable-length-logs.zip +0 -0
  70. {dcnum-0.16.1 → dcnum-0.16.3}/tests/helper_methods.py +0 -0
  71. {dcnum-0.16.1 → dcnum-0.16.3}/tests/requirements.txt +0 -0
  72. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_feat_brightness.py +0 -0
  73. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_feat_haralick.py +0 -0
  74. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_feat_moments_based.py +0 -0
  75. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_feat_moments_based_extended.py +0 -0
  76. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_init.py +0 -0
  77. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_logic_job.py +0 -0
  78. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_logic_join.py +0 -0
  79. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_ppid.py +0 -0
  80. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_ppid_bg.py +0 -0
  81. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_ppid_data.py +0 -0
  82. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_ppid_feat.py +0 -0
  83. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_ppid_gate.py +0 -0
  84. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_ppid_segm.py +0 -0
  85. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_read_basin.py +0 -0
  86. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_read_concat_hdf5.py +0 -0
  87. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_read_hdf5.py +0 -0
  88. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_segm_thresh.py +0 -0
  89. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_segmenter.py +0 -0
  90. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_write_deque_writer_thread.py +0 -0
  91. {dcnum-0.16.1 → dcnum-0.16.3}/tests/test_write_queue_collector_thread.py +0 -0
@@ -1,3 +1,23 @@
1
+ 0.16.3
2
+ - enh: define valid DCNumJobRunner state
3
+ - enh: more robust computation of progress
4
+ - enh: use HDF5Data when loading input data for background computation
5
+ - enh: automatically split segmenters and axtractors equally
6
+ - ref: reduce default image cache size from 5 to 2
7
+ - ref: move dataset generation default kwargs to writer submodule
8
+ - ref: warn above 0.5% of discarded events in EventExtractorManagerThread
9
+ 0.16.2
10
+ - fix: ignore empty HDF5 datasets when copying metadata
11
+ - fix: logging from subprocesses did not work as expected
12
+ - enh: warn user about total number of invalid masks
13
+ - enh: introduce DCNumJobRunner.error_tb for errors happening in threads
14
+ - enh: improve logging verbosity
15
+ - enh: append job information as log entry in DCNumJobRunner output file
16
+ - enh: set chunk size for all feature data to 1MiB in HDF5Writer
17
+ - ref: removed close_queues argument from EventExtractor init
18
+ - ref: rename event_count with image_count in background computation
19
+ - ref: do not print anything to stdout when computing background data
20
+ - ref: use data from background computer in DCNumJobRunner.get_status
1
21
  0.16.1
2
22
  - fix: when checking for ppid kwargs, allow kwargs defined in `__init__`
3
23
  - ref: use kwonly arguments for segmenter `__init__` method
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcnum
3
- Version: 0.16.1
3
+ Version: 0.16.3
4
4
  Summary: numerics toolbox for imaging deformability cytometry
5
5
  Author: Paul Müller
6
6
  Maintainer-email: Paul Müller <dev@craban.de>
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.16.1'
16
- __version_tuple__ = version_tuple = (0, 16, 1)
15
+ __version__ = version = '0.16.3'
16
+ __version_tuple__ = version_tuple = (0, 16, 3)
@@ -46,8 +46,6 @@ class EventExtractorManagerThread(threading.Thread):
46
46
  """
47
47
  super(EventExtractorManagerThread, self).__init__(
48
48
  name="EventExtractorManager", *args, **kwargs)
49
- if debug:
50
- fe_kwargs["close_queues"] = False
51
49
  self.logger = logging.getLogger(
52
50
  "dcnum.feat.EventExtractorManagerThread")
53
51
  #: Keyword arguments for class:`.EventExtractor`
@@ -83,9 +81,9 @@ class EventExtractorManagerThread(threading.Thread):
83
81
  for _ in range(self.num_workers)]
84
82
  [w.start() for w in workers]
85
83
 
84
+ num_slots = len(self.slot_states)
86
85
  chunks_processed = 0
87
86
  while True:
88
- num_slots = len(self.slot_states)
89
87
  cur_slot = 0
90
88
  unavailable_slots = 0
91
89
  # Check all slots for segmented labels
@@ -95,8 +93,10 @@ class EventExtractorManagerThread(threading.Thread):
95
93
  # - "s" the extractor processed the data and is waiting
96
94
  # for the segmenter
97
95
  if self.slot_states[cur_slot] == "e":
96
+ # The segmenter has something for us in this slot.
98
97
  break
99
98
  else:
99
+ # Try another slot.
100
100
  unavailable_slots += 1
101
101
  cur_slot = (cur_slot + 1) % num_slots
102
102
  if unavailable_slots >= num_slots:
@@ -136,15 +136,31 @@ class EventExtractorManagerThread(threading.Thread):
136
136
  if chunks_processed == self.data.image.num_chunks:
137
137
  break
138
138
 
139
- self.logger.debug("Waiting for event_queue to empty.")
140
139
  # Wait until the event queue is empty.
140
+ self.logger.debug("Waiting for event_queue to empty.")
141
141
  event_queue = self.fe_kwargs["event_queue"]
142
142
  while not event_queue.empty():
143
143
  # The collector thread is still sorting things out. Wait
144
144
  # before joining the threads.
145
- time.sleep(.1)
145
+ time.sleep(.05)
146
+
147
+ # Wait until log queue is empty
148
+ self.logger.debug("Waiting for log_queue to empty.")
149
+ log_queue = self.fe_kwargs["log_queue"]
150
+ while not log_queue.empty():
151
+ time.sleep(.05)
152
+
153
+ inv_masks = self.fe_kwargs["invalid_mask_counter"].value
154
+ if inv_masks:
155
+ self.logger.info(f"Encountered {inv_masks} invalid masks.")
156
+ inv_frac = inv_masks / len(self.data)
157
+ if inv_frac > 0.005: # warn above one half percent
158
+ self.logger.warning(f"Discarded {inv_frac:.1%} of the masks. "
159
+ f"Please check segmenter applicability.")
160
+
146
161
  self.logger.debug("Requesting extraction workers to join.")
147
162
  self.fe_kwargs["finalize_extraction"].value = True
148
163
  [w.join() for w in workers]
164
+
149
165
  self.logger.debug("Finished extraction.")
150
166
  self.logger.info(f"Extraction time: {self.t_count:.1f}s")
@@ -7,12 +7,16 @@ import uuid
7
7
  import warnings
8
8
 
9
9
  import h5py
10
- import hdf5plugin
11
10
  import numpy as np
12
11
 
13
12
  from ...meta import ppid
14
13
  from ...read import HDF5Data
15
- from ...write import create_with_basins
14
+ from ...write import create_with_basins, set_default_filter_kwargs
15
+
16
+
17
+ # All subprocesses should use 'spawn' to avoid issues with threads
18
+ # and 'fork' on POSIX systems.
19
+ mp_spawn = mp.get_context('spawn')
16
20
 
17
21
 
18
22
  class Background(abc.ABC):
@@ -55,12 +59,14 @@ class Background(abc.ABC):
55
59
  self.kwargs.update(kwargs)
56
60
 
57
61
  if num_cpus is None:
58
- num_cpus = mp.cpu_count()
62
+ num_cpus = mp_spawn.cpu_count()
59
63
  #: number of CPUs used
60
64
  self.num_cpus = num_cpus
61
65
 
62
- #: number of frames
63
- self.event_count = None
66
+ #: number of images in the input data
67
+ self.image_count = None
68
+ #: number of images that have been processed
69
+ self.image_proc = mp_spawn.Value("L", 0)
64
70
 
65
71
  #: HDF5Data instance for input data
66
72
  self.hdin = None
@@ -93,7 +99,7 @@ class Background(abc.ABC):
93
99
  #: shape of event images
94
100
  self.image_shape = self.input_data[0].shape
95
101
  #: total number of events
96
- self.event_count = len(self.input_data)
102
+ self.image_count = len(self.input_data)
97
103
 
98
104
  if self.h5out is None:
99
105
  if not output_path.exists():
@@ -105,19 +111,15 @@ class Background(abc.ABC):
105
111
  self.h5out = h5py.File(output_path, "a", libver="latest")
106
112
 
107
113
  # Initialize background data
108
- if compress:
109
- compression_kwargs = hdf5plugin.Zstd(clevel=5)
110
- else:
111
- compression_kwargs = {}
114
+ ds_kwargs = set_default_filter_kwargs(compression=compress)
112
115
  h5bg = self.h5out.require_dataset(
113
116
  "events/image_bg",
114
117
  shape=self.input_data.shape,
115
118
  dtype=np.uint8,
116
- chunks=(min(100, self.event_count),
119
+ chunks=(min(100, self.image_count),
117
120
  self.image_shape[0],
118
121
  self.image_shape[1]),
119
- fletcher32=True,
120
- **compression_kwargs,
122
+ **ds_kwargs,
121
123
  )
122
124
  h5bg.attrs.create('CLASS', np.string_('IMAGE'))
123
125
  h5bg.attrs.create('IMAGE_VERSION', np.string_('1.2'))
@@ -191,6 +193,13 @@ class Background(abc.ABC):
191
193
  ppid=pp_check_user_kwargs)
192
194
  return kwargs
193
195
 
196
+ def get_progress(self):
197
+ """Return progress of background computation, float in [0,1]"""
198
+ if self.image_count == 0:
199
+ return 0.
200
+ else:
201
+ return self.image_proc.value / self.image_count
202
+
194
203
  def process(self):
195
204
  self.process_approach()
196
205
  bg_ppid = self.get_ppid()
@@ -1,16 +1,10 @@
1
- import multiprocessing as mp
2
1
  import queue
3
2
  import time
4
3
 
5
4
  import numpy as np
6
5
  from scipy import ndimage
7
6
 
8
- from .base import Background
9
-
10
-
11
- # All subprocesses should use 'spawn' to avoid issues with threads
12
- # and 'fork' on POSIX systems.
13
- mp_spawn = mp.get_context('spawn')
7
+ from .base import mp_spawn, Background
14
8
 
15
9
 
16
10
  class BackgroundRollMed(Background):
@@ -152,9 +146,9 @@ class BackgroundRollMed(Background):
152
146
  stop_in = (batch_index + 1) * self.batch_size + self.kernel_size
153
147
  stop_out = (batch_index + 1) * self.batch_size
154
148
 
155
- if stop_in > self.event_count:
156
- stop_in = self.event_count
157
- stop_out = self.event_count - self.kernel_size
149
+ if stop_in > self.image_count:
150
+ stop_in = self.image_count
151
+ stop_out = self.image_count - self.kernel_size
158
152
 
159
153
  slice_in = slice(start, stop_in)
160
154
  slice_out = slice(start, stop_out)
@@ -175,16 +169,14 @@ class BackgroundRollMed(Background):
175
169
 
176
170
  def process_approach(self):
177
171
  """Perform median computation on entire input data"""
178
- num_steps = int(np.ceil(self.event_count / self.batch_size))
172
+ num_steps = int(np.ceil(self.image_count / self.batch_size))
179
173
  for ii in range(num_steps):
180
- print(f"Computing background {ii/num_steps*100:.0f}%",
181
- end="\r", flush=True)
182
174
  self.process_next_batch()
183
175
  # Set the remaining kernel_size median values to the last one
184
176
  last_image = self.h5out["events/image_bg"][-self.kernel_size-1]
185
177
  for ii in range(self.kernel_size):
186
- self.h5out["events/image_bg"][self.event_count-ii-1] = last_image
187
- print("Computing background 100% ", flush=True)
178
+ self.h5out["events/image_bg"][self.image_count-ii-1] = last_image
179
+ self.image_proc.value = self.image_count
188
180
 
189
181
  def process_next_batch(self):
190
182
  """Process one batch of input data"""
@@ -221,6 +213,7 @@ class BackgroundRollMed(Background):
221
213
  *self.image_shape)
222
214
 
223
215
  self.current_batch += 1
216
+ self.image_proc.value += self.batch_size
224
217
 
225
218
 
226
219
  class MedianWorker(mp_spawn.Process):
@@ -1,19 +1,15 @@
1
1
  import logging
2
- import multiprocessing as mp
3
2
  import queue
4
3
  import time
5
4
 
6
5
  import numpy as np
7
6
  from scipy import ndimage
8
7
 
9
- from .base import Background
10
-
11
- logger = logging.getLogger(__name__)
8
+ from ...read import HDF5Data
12
9
 
10
+ from .base import mp_spawn, Background
13
11
 
14
- # All subprocesses should use 'spawn' to avoid issues with threads
15
- # and 'fork' on POSIX systems.
16
- mp_spawn = mp.get_context('spawn')
12
+ logger = logging.getLogger(__name__)
17
13
 
18
14
 
19
15
  class BackgroundSparseMed(Background):
@@ -96,27 +92,28 @@ class BackgroundSparseMed(Background):
96
92
  # time axis
97
93
  self.time = None
98
94
  if self.h5in is not None:
99
- if "time" in self.h5in["events"]:
95
+ hd = HDF5Data(self.h5in)
96
+ if "time" in hd:
100
97
  # use actual time from dataset
101
- self.time = self.h5in["/events/time"][:]
98
+ self.time = hd["time"][:]
102
99
  self.time -= self.time[0]
103
- elif "imaging:frame rate" in self.h5in.attrs:
104
- fr = self.h5in.attrs["imaging:frame rate"]
105
- if "frame" in self.h5in["/events"]:
100
+ elif "imaging:frame rate" in hd.meta:
101
+ fr = hd.meta["imaging:frame rate"]
102
+ if "frame" in hd:
106
103
  # compute time from frame rate and frame numbers
107
- self.time = self.h5in["/events/frame"] / fr
104
+ self.time = hd["frame"] / fr
108
105
  self.time -= self.time[0]
109
106
  else:
110
107
  # compute time using frame rate (approximate)
111
- dur = self.event_count / fr * 1.5
108
+ dur = self.image_count / fr * 1.5
112
109
  logger.info(f"Approximating duration: {dur/60:.1f}min")
113
- self.time = np.linspace(0, dur, self.event_count,
110
+ self.time = np.linspace(0, dur, self.image_count,
114
111
  endpoint=True)
115
112
  if self.time is None:
116
113
  # No HDF5 file or no information therein; Make an educated guess.
117
- dur = self.event_count / 3600 * 1.5
114
+ dur = self.image_count / 3600 * 1.5
118
115
  logger.info(f"Guessing duration: {dur/60:.1f}min")
119
- self.time = np.linspace(0, dur, self.event_count,
116
+ self.time = np.linspace(0, dur, self.image_count,
120
117
  endpoint=True)
121
118
 
122
119
  #: duration of the measurement
@@ -212,10 +209,7 @@ class BackgroundSparseMed(Background):
212
209
 
213
210
  # Compute initial background images (populates self.bg_images)
214
211
  for ii, ti in enumerate(self.step_times):
215
- print(f"Computing background {ii / self.step_times.size:.0%}",
216
- end="\r", flush=True)
217
212
  self.process_second(ii, ti)
218
- print("Computing background 100% ", flush=True)
219
213
 
220
214
  if self.frac_cleansing != 1:
221
215
  # The following algorithm finds background images that contain
@@ -277,7 +271,7 @@ class BackgroundSparseMed(Background):
277
271
  f"`thresh_cleansing` or `frac_cleansing`. The new "
278
272
  f"threshold is {thresh_fact / thresh}.")
279
273
 
280
- logger.info(f"Removed {frac_remove:.2%} of the background series")
274
+ logger.info(f"Cleansed {frac_remove:.2%}")
281
275
  step_times = self.step_times[used]
282
276
  bg_images = self.bg_images[used]
283
277
  else:
@@ -286,7 +280,7 @@ class BackgroundSparseMed(Background):
286
280
  bg_images = self.bg_images
287
281
 
288
282
  # Assign each frame to a certain background index
289
- bg_idx = np.zeros(self.event_count, dtype=int)
283
+ bg_idx = np.zeros(self.image_count, dtype=int)
290
284
  idx0 = 0
291
285
  idx1 = None
292
286
  for ii in range(len(step_times)):
@@ -298,21 +292,25 @@ class BackgroundSparseMed(Background):
298
292
  # Fill up remainder of index array with last entry
299
293
  bg_idx[idx1:] = ii
300
294
 
295
+ self.image_proc.value = self.image_count
296
+
301
297
  # Write background data
302
298
  pos = 0
303
299
  step = 1000
304
- while pos < self.event_count:
305
- stop = min(pos + step, self.event_count)
300
+ while pos < self.image_count:
301
+ stop = min(pos + step, self.image_count)
306
302
  cur_slice = slice(pos, stop)
307
303
  self.h5out["events/image_bg"][cur_slice] = \
308
304
  bg_images[bg_idx[cur_slice]]
309
305
  pos += step
310
306
 
311
- def process_second(self, ii, second):
307
+ def process_second(self,
308
+ ii: int,
309
+ second: float | int):
312
310
  idx_start = np.argmin(np.abs(second - self.time))
313
311
  idx_stop = idx_start + self.kernel_size
314
- if idx_stop >= self.event_count:
315
- idx_stop = self.event_count
312
+ if idx_stop >= self.image_count:
313
+ idx_stop = self.image_count
316
314
  idx_start = max(0, idx_stop - self.kernel_size)
317
315
  assert idx_stop - idx_start == self.kernel_size
318
316
 
@@ -347,6 +345,8 @@ class BackgroundSparseMed(Background):
347
345
 
348
346
  self.bg_images[ii] = self.shared_output.reshape(self.image_shape)
349
347
 
348
+ self.image_proc.value = idx_stop
349
+
350
350
 
351
351
  class MedianWorkerSingle(mp_spawn.Process):
352
352
  def __init__(self, job_queue, counter, shared_input, shared_output,
@@ -35,7 +35,8 @@ class QueueEventExtractor:
35
35
  feat_nevents: mp.Array,
36
36
  label_array: mp.Array,
37
37
  finalize_extraction: mp.Value,
38
- close_queues: bool = True,
38
+ invalid_mask_counter: mp.Value,
39
+ log_level: int = logging.INFO,
39
40
  extract_kwargs: dict = None,
40
41
  *args, **kwargs):
41
42
  """Base class for event extraction from label images
@@ -66,9 +67,10 @@ class QueueEventExtractor:
66
67
  finalize_extraction:
67
68
  Shared value indicating whether this worker should stop as
68
69
  soon as the `raw_queue` is empty.
69
- close_queues: bool
70
- Whether to close event and logging queues
71
- (set to False in debug mode)
70
+ invalid_mask_counter:
71
+ Counts masks labeled as invalid by the feature extractor
72
+ log_level:
73
+ Logging level to use
72
74
  extract_kwargs:
73
75
  Keyword arguments for the extraction process. See the
74
76
  keyword-only arguments in
@@ -85,7 +87,13 @@ class QueueEventExtractor:
85
87
  self.event_queue = event_queue
86
88
  #: queue for logging
87
89
  self.log_queue = log_queue
88
- self.close_queues = close_queues
90
+ #: invalid mask counter
91
+ self.invalid_mask_counter = invalid_mask_counter
92
+ # Logging needs to be set up after `start` is called, otherwise
93
+ # it looks like we have the same PID as the parent process. We
94
+ # are setting up logging in `run`.
95
+ self.logger = None
96
+ self.log_level = log_level
89
97
  #: Shared array of length `len(data)` into which the number of
90
98
  #: events per frame is written.
91
99
  self.feat_nevents = feat_nevents
@@ -100,15 +108,12 @@ class QueueEventExtractor:
100
108
  extract_kwargs.setdefault("haralick", True)
101
109
  #: Feature extraction keyword arguments.
102
110
  self.extract_kwargs = extract_kwargs
103
- # Logging needs to be set up after `start` is called, otherwise
104
- # it looks like we have the same PID as the parent process. We
105
- # are setting up logging in `run`.
106
- self.logger = None
107
111
 
108
112
  @staticmethod
109
113
  def get_init_kwargs(data: HDF5Data,
110
114
  gate: Gate,
111
115
  log_queue: mp.Queue,
116
+ log_level: int = logging.INFO,
112
117
  preselect: None = None,
113
118
  ptp_median: None = None):
114
119
  """Get initialization arguments for :cass:`.QueueEventExtractor`
@@ -125,7 +130,9 @@ class QueueEventExtractor:
125
130
  gate: HDF5Data
126
131
  Gating class to use
127
132
  log_queue: mp.Queue
128
- Queue for sending log messages
133
+ Queue the worker uses for sending log messages
134
+ log_level: int
135
+ Logging level to use in the worker process
129
136
  preselect, ptp_median:
130
137
  Deprecated
131
138
 
@@ -146,6 +153,7 @@ class QueueEventExtractor:
146
153
  warnings.warn("The `ptp_median` argument is deprecated!",
147
154
  DeprecationWarning)
148
155
 
156
+ # Note that the order must be identical to __init__
149
157
  args = collections.OrderedDict()
150
158
  args["data"] = data
151
159
  args["gate"] = gate
@@ -159,7 +167,8 @@ class QueueEventExtractor:
159
167
  np.ctypeslib.ctypes.c_int16,
160
168
  int(np.prod(data.image.chunk_shape)))
161
169
  args["finalize_extraction"] = mp_spawn.Value("b", False)
162
- args["close_queues"] = True
170
+ args["invalid_mask_counter"] = mp_spawn.Value("L", 0)
171
+ args["log_level"] = log_level
163
172
  return args
164
173
 
165
174
  def get_events_from_masks(self, masks, data_index, *,
@@ -207,8 +216,7 @@ class QueueEventExtractor:
207
216
  # over from gated_events to valid_events. According to our experience
208
217
  # invalid events happen rarely though.
209
218
  if np.any(invalid):
210
- self.logger.info(f"Discarded {np.sum(invalid)} events due to "
211
- "invalid segmentation.")
219
+ self.invalid_mask_counter.value += np.sum(invalid)
212
220
  for key in gated_events:
213
221
  valid_events[key] = gated_events[key][valid]
214
222
  else:
@@ -294,17 +302,27 @@ class QueueEventExtractor:
294
302
  """Main loop of worker process"""
295
303
  # Don't wait for these two queues when joining workers
296
304
  self.raw_queue.cancel_join_thread()
297
- self.log_queue.cancel_join_thread()
298
305
  #: logger sends all logs to `self.log_queue`
299
306
  self.logger = logging.getLogger(
300
307
  f"dcnum.feat.EventExtractor.{os.getpid()}")
308
+ self.logger.setLevel(self.log_level)
309
+ # Clear any handlers that might be set for this logger. This is
310
+ # important for the case when we are an instance of
311
+ # EventExtractorThread, because then all handlers from the main
312
+ # thread are inherited (as opposed to no handlers in the case
313
+ # of EventExtractorProcess).
314
+ self.logger.handlers.clear()
301
315
  queue_handler = QueueHandler(self.log_queue)
316
+ queue_handler.setLevel(self.log_level)
302
317
  self.logger.addHandler(queue_handler)
303
- self.logger.addFilter(DeduplicatingLoggingFilter())
304
- self.logger.debug(f"Running {self} in PID {os.getpid()}")
318
+ self.logger.info("Ready")
305
319
 
306
320
  mp_array = np.ctypeslib.as_array(
307
321
  self.label_array).reshape(self.data.image.chunk_shape)
322
+
323
+ # only close queues when we have created them ourselves.
324
+ close_queues = isinstance(self, EventExtractorProcess)
325
+
308
326
  while True:
309
327
  try:
310
328
  chunk_index, label_index = self.raw_queue.get(timeout=.03)
@@ -332,15 +350,21 @@ class QueueEventExtractor:
332
350
  self.event_queue.put((index, events))
333
351
 
334
352
  self.logger.debug(f"Finalizing `run` for PID {os.getpid()}, {self}")
335
- if self.close_queues:
353
+ if close_queues:
336
354
  # Explicitly close the event queue and join it
337
355
  self.event_queue.close()
338
356
  self.event_queue.join_thread()
339
357
  self.logger.debug(f"End of `run` for PID {os.getpid()}, {self}")
358
+
359
+ # Make sure everything gets written to the queue.
360
+ queue_handler.flush()
361
+
362
+ if close_queues:
340
363
  # Also close the logging queue. Note that not all messages might
341
364
  # arrive in the logging queue, since we called `cancel_join_thread`
342
365
  # earlier.
343
366
  self.log_queue.close()
367
+ self.log_queue.join_thread()
344
368
 
345
369
  @classmethod
346
370
  def get_ppid_from_kwargs(cls, kwargs):
@@ -362,17 +386,3 @@ class EventExtractorThread(QueueEventExtractor, threading.Thread):
362
386
  def __init__(self, *args, **kwargs):
363
387
  super(EventExtractorThread, self).__init__(
364
388
  name="EventExtractorThread", *args, **kwargs)
365
-
366
-
367
- class DeduplicatingLoggingFilter(logging.Filter):
368
- def __init__(self, *args, **kwargs):
369
- super(DeduplicatingLoggingFilter, self).__init__(*args, **kwargs)
370
- self._records = []
371
-
372
- def filter(self, record):
373
- """Return True if the record should be logged"""
374
- msg = record.getMessage()
375
- logged = msg in self._records
376
- if not logged:
377
- self._records.append(msg)
378
- return not logged