dcnum 0.23.3__tar.gz → 0.24.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

Files changed (118) hide show
  1. {dcnum-0.23.3 → dcnum-0.24.0}/.github/workflows/check.yml +2 -2
  2. {dcnum-0.23.3 → dcnum-0.24.0}/CHANGELOG +14 -0
  3. {dcnum-0.23.3 → dcnum-0.24.0}/PKG-INFO +1 -1
  4. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/_version.py +2 -2
  5. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_background/base.py +24 -9
  6. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_background/bg_sparse_median.py +54 -28
  7. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/logic/ctrl.py +83 -38
  8. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/meta/ppid.py +1 -1
  9. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/read/hdf5_data.py +138 -72
  10. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/read/mapped.py +15 -2
  11. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segm_torch/segm_torch_mpo.py +4 -1
  12. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/write/__init__.py +1 -1
  13. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/write/writer.py +122 -21
  14. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum.egg-info/PKG-INFO +1 -1
  15. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_background_base.py +28 -10
  16. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_background_bg_roll_median.py +31 -0
  17. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_background_bg_sparsemed.py +179 -7
  18. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_logic_pipeline.py +101 -12
  19. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_read_basin.py +24 -17
  20. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_read_hdf5_basins.py +16 -14
  21. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_read_hdf5_index_mapping.py +10 -4
  22. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_write_writer.py +93 -9
  23. {dcnum-0.23.3 → dcnum-0.24.0}/.github/workflows/deploy_pypi.yml +0 -0
  24. {dcnum-0.23.3 → dcnum-0.24.0}/.gitignore +0 -0
  25. {dcnum-0.23.3 → dcnum-0.24.0}/.readthedocs.yml +0 -0
  26. {dcnum-0.23.3 → dcnum-0.24.0}/LICENSE +0 -0
  27. {dcnum-0.23.3 → dcnum-0.24.0}/README.rst +0 -0
  28. {dcnum-0.23.3 → dcnum-0.24.0}/docs/conf.py +0 -0
  29. {dcnum-0.23.3 → dcnum-0.24.0}/docs/extensions/github_changelog.py +0 -0
  30. {dcnum-0.23.3 → dcnum-0.24.0}/docs/index.rst +0 -0
  31. {dcnum-0.23.3 → dcnum-0.24.0}/docs/requirements.txt +0 -0
  32. {dcnum-0.23.3 → dcnum-0.24.0}/pyproject.toml +0 -0
  33. {dcnum-0.23.3 → dcnum-0.24.0}/setup.cfg +0 -0
  34. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/__init__.py +0 -0
  35. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/__init__.py +0 -0
  36. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/event_extractor_manager_thread.py +0 -0
  37. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_background/__init__.py +0 -0
  38. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_background/bg_copy.py +0 -0
  39. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_background/bg_roll_median.py +0 -0
  40. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_brightness/__init__.py +0 -0
  41. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_brightness/bright_all.py +0 -0
  42. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_brightness/common.py +0 -0
  43. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_contour/__init__.py +0 -0
  44. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_contour/contour.py +0 -0
  45. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_contour/moments.py +0 -0
  46. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_contour/volume.py +0 -0
  47. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_texture/__init__.py +0 -0
  48. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_texture/common.py +0 -0
  49. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/feat_texture/tex_all.py +0 -0
  50. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/gate.py +0 -0
  51. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/feat/queue_event_extractor.py +0 -0
  52. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/logic/__init__.py +0 -0
  53. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/logic/job.py +0 -0
  54. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/logic/json_encoder.py +0 -0
  55. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/meta/__init__.py +0 -0
  56. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/meta/paths.py +0 -0
  57. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/read/__init__.py +0 -0
  58. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/read/cache.py +0 -0
  59. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/read/const.py +0 -0
  60. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/__init__.py +0 -0
  61. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segm_thresh.py +0 -0
  62. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segm_torch/__init__.py +0 -0
  63. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segm_torch/segm_torch_base.py +0 -0
  64. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segm_torch/segm_torch_sto.py +0 -0
  65. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segm_torch/torch_model.py +0 -0
  66. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segm_torch/torch_postproc.py +0 -0
  67. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segm_torch/torch_preproc.py +0 -0
  68. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segmenter.py +0 -0
  69. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segmenter_manager_thread.py +0 -0
  70. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segmenter_mpo.py +0 -0
  71. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/segm/segmenter_sto.py +0 -0
  72. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/write/deque_writer_thread.py +0 -0
  73. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum/write/queue_collector_thread.py +0 -0
  74. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum.egg-info/SOURCES.txt +0 -0
  75. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum.egg-info/dependency_links.txt +0 -0
  76. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum.egg-info/requires.txt +0 -0
  77. {dcnum-0.23.3 → dcnum-0.24.0}/src/dcnum.egg-info/top_level.txt +0 -0
  78. {dcnum-0.23.3 → dcnum-0.24.0}/tests/conftest.py +0 -0
  79. {dcnum-0.23.3 → dcnum-0.24.0}/tests/data/fmt-hdf5_cytoshot_extended-moments-features.zip +0 -0
  80. {dcnum-0.23.3 → dcnum-0.24.0}/tests/data/fmt-hdf5_cytoshot_full-features_2023.zip +0 -0
  81. {dcnum-0.23.3 → dcnum-0.24.0}/tests/data/fmt-hdf5_cytoshot_full-features_2024.zip +0 -0
  82. {dcnum-0.23.3 → dcnum-0.24.0}/tests/data/fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip +0 -0
  83. {dcnum-0.23.3 → dcnum-0.24.0}/tests/data/fmt-hdf5_shapein_empty.zip +0 -0
  84. {dcnum-0.23.3 → dcnum-0.24.0}/tests/data/fmt-hdf5_shapein_raw-with-variable-length-logs.zip +0 -0
  85. {dcnum-0.23.3 → dcnum-0.24.0}/tests/data/segm-torch-model_unet-dcnum-test_g1_910c2.zip +0 -0
  86. {dcnum-0.23.3 → dcnum-0.24.0}/tests/data/segm-torch-test-data_unet-dcnum-test_g1_910c2.zip +0 -0
  87. {dcnum-0.23.3 → dcnum-0.24.0}/tests/helper_methods.py +0 -0
  88. {dcnum-0.23.3 → dcnum-0.24.0}/tests/requirements.txt +0 -0
  89. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_background_bg_copy.py +0 -0
  90. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_brightness.py +0 -0
  91. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_event_extractor_manager.py +0 -0
  92. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_gate.py +0 -0
  93. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_haralick.py +0 -0
  94. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_moments_based.py +0 -0
  95. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_moments_based_extended.py +0 -0
  96. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_feat_volume.py +0 -0
  97. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_init.py +0 -0
  98. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_logic_job.py +0 -0
  99. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_logic_join.py +0 -0
  100. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_logic_json.py +0 -0
  101. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_meta_paths.py +0 -0
  102. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_meta_ppid_base.py +0 -0
  103. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_meta_ppid_bg.py +0 -0
  104. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_meta_ppid_data.py +0 -0
  105. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_meta_ppid_feat.py +0 -0
  106. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_meta_ppid_gate.py +0 -0
  107. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_meta_ppid_segm.py +0 -0
  108. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_read_concat_hdf5.py +0 -0
  109. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_read_hdf5.py +0 -0
  110. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_segm_base.py +0 -0
  111. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_segm_mpo.py +0 -0
  112. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_segm_no_mask_proc.py +0 -0
  113. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_segm_sto.py +0 -0
  114. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_segm_thresh.py +0 -0
  115. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_segm_torch.py +0 -0
  116. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_segm_torch_preproc.py +0 -0
  117. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_write_deque_writer_thread.py +0 -0
  118. {dcnum-0.23.3 → dcnum-0.24.0}/tests/test_write_queue_collector_thread.py +0 -0
@@ -29,8 +29,8 @@ jobs:
29
29
  python -m pip install coverage flake8 pytest
30
30
  - name: Install dcnum
31
31
  run: |
32
- # https://github.com/luispedro/mahotas/issues/144
33
- pip install mahotas==1.4.13
32
+ # mahotas 1.4.15 does not yet support numpy 2.0
33
+ pip install "numpy<2"
34
34
  pip install .[torch]
35
35
  - name: List installed packages
36
36
  run: |
@@ -1,3 +1,17 @@
1
+ 0.24.0
2
+ - feat: add support for internal basins
3
+ - feat: "image_bg" as internal basin for "sparsemed" background computer
4
+ - fix: "sparsmed" background computer attributed background images with
5
+ an offset of `split_time` (the fist event obtained the background image
6
+ of the first event of the first second and so on)
7
+ - enh: support numpy indexing for mapped basins
8
+ - enh: add new `write.copy_basins` method
9
+ - ref: return `h5py.Group` in `HDF5Data.get_basin_data` instead of
10
+ a basin `HDF5Data` instance
11
+ - ref: perform "plumbing" before "cleanup" in pipeline
12
+ - ref: increment DCNUM_PPID_GENERATION to 11
13
+ 0.23.4
14
+ - enh: run set_num_interop_threads(1) for torchmpo segmenter
1
15
  0.23.3
2
16
  - fix: ignore non-file-type-like basins
3
17
  - fix: workaround for slow reading from HDF5 (don't use index arrays)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: dcnum
3
- Version: 0.23.3
3
+ Version: 0.24.0
4
4
  Summary: numerics toolbox for imaging deformability cytometry
5
5
  Author: Maximilian Schlögel, Paul Müller, Raghava Alajangi
6
6
  Maintainer-email: Paul Müller <dev@craban.de>
@@ -12,5 +12,5 @@ __version__: str
12
12
  __version_tuple__: VERSION_TUPLE
13
13
  version_tuple: VERSION_TUPLE
14
14
 
15
- __version__ = version = '0.23.3'
16
- __version_tuple__ = version_tuple = (0, 23, 3)
15
+ __version__ = version = '0.24.0'
16
+ __version_tuple__ = version_tuple = (0, 24, 0)
@@ -1,8 +1,10 @@
1
1
  import abc
2
2
  import functools
3
3
  import inspect
4
+ import logging
4
5
  import multiprocessing as mp
5
6
  import pathlib
7
+ import time
6
8
 
7
9
  import h5py
8
10
 
@@ -41,8 +43,11 @@ class Background(abc.ABC):
41
43
  kwargs:
42
44
  Additional keyword arguments passed to the subclass.
43
45
  """
46
+ self.logger = logging.getLogger(
47
+ f"dcnum.feat.feat_background.{self.__class__.__name__}")
44
48
  # proper conversion to Path objects
45
49
  output_path = pathlib.Path(output_path)
50
+ self.output_path = output_path
46
51
  if isinstance(input_data, str):
47
52
  input_data = pathlib.Path(input_data)
48
53
  # kwargs checks
@@ -188,20 +193,30 @@ class Background(abc.ABC):
188
193
  return self.image_proc.value
189
194
 
190
195
  def process(self):
196
+ """Perform the background computation
197
+
198
+ This irreversibly removes/overrides any "image_bg" and
199
+ "bg_off" features defined in the output file `self.h5out`.
200
+ """
201
+ t0 = time.perf_counter()
191
202
  # Delete any old background data
192
- for key in ["image_bg", "bg_off"]:
193
- if key in self.h5out["events"]:
194
- del self.h5out["events"][key]
203
+ for ds_key in ["image_bg", "bg_off"]:
204
+ for grp_key in ["events", "basin_events"]:
205
+ if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
206
+ del self.h5out[grp_key][ds_key]
195
207
  # Perform the actual background computation
196
208
  self.process_approach()
197
209
  bg_ppid = self.get_ppid()
198
210
  # Store pipeline information in the image_bg/bg_off feature
199
- for key in ["image_bg", "bg_off"]:
200
- if key in self.h5out["events"]:
201
- self.h5out[f"events/{key}"].attrs["dcnum ppid background"] = \
202
- bg_ppid
203
- self.h5out[F"events/{key}"].attrs["dcnum ppid generation"] = \
204
- ppid.DCNUM_PPID_GENERATION
211
+ for ds_key in ["image_bg", "bg_off"]:
212
+ for grp_key in ["events", "basin_events"]:
213
+ if grp_key in self.h5out and ds_key in self.h5out[grp_key]:
214
+ self.h5out[f"{grp_key}/{ds_key}"].attrs[
215
+ "dcnum ppid background"] = bg_ppid
216
+ self.h5out[F"{grp_key}/{ds_key}"].attrs[
217
+ "dcnum ppid generation"] = ppid.DCNUM_PPID_GENERATION
218
+ self.logger.info(
219
+ f"Background computation time: {time.perf_counter()-t0:.1f}s")
205
220
 
206
221
  @abc.abstractmethod
207
222
  def process_approach(self):
@@ -1,4 +1,3 @@
1
- import logging
2
1
  import queue
3
2
  import time
4
3
 
@@ -9,14 +8,13 @@ from ...read import HDF5Data
9
8
 
10
9
  from .base import mp_spawn, Background
11
10
 
12
- logger = logging.getLogger(__name__)
13
-
14
11
 
15
12
  class BackgroundSparseMed(Background):
16
13
  def __init__(self, input_data, output_path, kernel_size=200,
17
14
  split_time=1., thresh_cleansing=0, frac_cleansing=.8,
18
15
  offset_correction=True,
19
- compress=True, num_cpus=None):
16
+ compress=True,
17
+ num_cpus=None):
20
18
  """Sparse median background correction with cleansing
21
19
 
22
20
  In contrast to the rolling median background correction,
@@ -79,6 +77,11 @@ class BackgroundSparseMed(Background):
79
77
  num_cpus: int
80
78
  Number of CPUs to use for median computation. Defaults to
81
79
  `multiprocessing.cpu_count()`.
80
+
81
+ .. versionchanged:: 0.23.5
82
+
83
+ The background image data are stored as an internal
84
+ mapped basin to reduce the output file size.
82
85
  """
83
86
  super(BackgroundSparseMed, self).__init__(
84
87
  input_data=input_data,
@@ -93,7 +96,7 @@ class BackgroundSparseMed(Background):
93
96
  )
94
97
 
95
98
  if kernel_size > len(self.input_data):
96
- logger.warning(
99
+ self.logger.warning(
97
100
  f"The kernel size {kernel_size} is too large for input data"
98
101
  f"size {len(self.input_data)}. Setting it to input data size!")
99
102
  kernel_size = len(self.input_data)
@@ -126,13 +129,14 @@ class BackgroundSparseMed(Background):
126
129
  else:
127
130
  # compute time using frame rate (approximate)
128
131
  dur = self.image_count / fr * 1.5
129
- logger.info(f"Approximating duration: {dur/60:.1f}min")
132
+ self.logger.info(
133
+ f"Approximating duration: {dur/60:.1f}min")
130
134
  self.time = np.linspace(0, dur, self.image_count,
131
135
  endpoint=True)
132
136
  if self.time is None:
133
137
  # No HDF5 file or no information therein; Make an educated guess.
134
138
  dur = self.image_count / 3600 * 1.5
135
- logger.info(f"Guessing duration: {dur/60:.1f}min")
139
+ self.logger.info(f"Guessing duration: {dur/60:.1f}min")
136
140
  self.time = np.linspace(0, dur, self.image_count,
137
141
  endpoint=True)
138
142
 
@@ -301,18 +305,18 @@ class BackgroundSparseMed(Background):
301
305
  thresh = np.quantile(ref, self.frac_cleansing)
302
306
  used = ref <= thresh
303
307
  frac_remove = np.sum(~used) / used.size
304
- logger.warning(
308
+ self.logger.warning(
305
309
  f"{frac_remove_user:.1%} of the background images would "
306
310
  f"be removed with the current settings, so we enforce "
307
311
  f"`frac_cleansing`. To avoid this warning, try decreasing "
308
312
  f"`thresh_cleansing` or `frac_cleansing`. The new "
309
313
  f"threshold is {thresh_fact / thresh}.")
310
314
 
311
- logger.info(f"Cleansed {frac_remove:.2%}")
315
+ self.logger.info(f"Cleansed {frac_remove:.2%}")
312
316
  step_times = self.step_times[used]
313
317
  bg_images = self.bg_images[used]
314
318
  else:
315
- logger.info("Background series cleansing disabled")
319
+ self.logger.info("Background series cleansing disabled")
316
320
  step_times = self.step_times
317
321
  bg_images = self.bg_images
318
322
 
@@ -322,35 +326,55 @@ class BackgroundSparseMed(Background):
322
326
  idx1 = None
323
327
  for ii in range(len(step_times)):
324
328
  t1 = step_times[ii]
325
- idx1 = np.argmin(np.abs(self.time - t1 + self.split_time/2))
329
+ idx1 = np.argmin(np.abs(self.time - t1 - self.split_time/2))
326
330
  bg_idx[idx0:idx1] = ii
327
331
  idx0 = idx1
328
332
  if idx1 is not None:
329
333
  # Fill up remainder of index array with last entry
330
334
  bg_idx[idx1:] = ii
331
335
 
332
- self.image_proc.value = 1
333
-
334
- # Write background data
335
- pos = 0
336
- step = 1000
337
- while pos < self.image_count:
338
- stop = min(pos + step, self.image_count)
339
- cur_slice = slice(pos, stop)
340
- cur_bg_data = bg_images[bg_idx[cur_slice]]
341
- self.writer.store_feature_chunk("image_bg", cur_bg_data)
342
- if self.offset_correction:
336
+ # Store the background images as an internal mapped basin
337
+ self.writer.store_basin(
338
+ name="background images",
339
+ description=f"Pipeline identifier: {self.get_ppid()}",
340
+ mapping=bg_idx,
341
+ internal_data={"image_bg": bg_images}
342
+ )
343
+
344
+ # store the offset correction, if applicable
345
+ if self.offset_correction:
346
+ self.logger.info("Computing offset correction")
347
+ # compute the mean at the top of all background images
348
+ sh, sw = self.input_data.shape[1:]
349
+ roi_full = (slice(None), slice(0, 20), slice(0, sw))
350
+ bg_data_mean = np.mean(bg_images[roi_full], axis=(1, 2))
351
+ pos = 0
352
+ step = self.writer.get_best_nd_chunks(item_shape=(sh, sw),
353
+ feat_dtype=np.uint8)[0]
354
+ bg_off = np.zeros(self.image_count, dtype=float)
355
+ # For every chunk in the input image data, compute that
356
+ # value as well and store the resulting offset value.
357
+ # TODO: Could this be parallelized, or are we limited in reading?
358
+ while pos < self.image_count:
359
+ stop = min(pos + step, self.image_count)
343
360
  # Record background offset correction "bg_off". We take a
344
361
  # slice of 20px from the top of the image (there are normally
345
362
  # no events here, only the channel walls are visible).
346
- sh, sw = self.input_data.shape[1:]
347
- roi_full = (slice(None), slice(0, 20), slice(0, sw))
363
+ cur_slice = slice(pos, stop)
364
+ # mean background brightness
365
+ val_bg = bg_data_mean[bg_idx[cur_slice]]
366
+ # mean image brightness
348
367
  roi_cur = (cur_slice, slice(0, 20), slice(0, sw))
349
- val_bg = np.mean(cur_bg_data[roi_full], axis=(1, 2))
350
368
  val_dat = np.mean(self.input_data[roi_cur], axis=(1, 2))
351
369
  # background image = image_bg + bg_off
352
- self.writer.store_feature_chunk("bg_off", val_dat - val_bg)
353
- pos += step
370
+ bg_off[cur_slice] = val_dat - val_bg
371
+ # set progress
372
+ self.image_proc.value = 0.5 * (1 + pos / self.image_count)
373
+ pos = stop
374
+ # finally, store the background offset feature
375
+ self.writer.store_feature_chunk("bg_off", bg_off)
376
+
377
+ self.image_proc.value = 1
354
378
 
355
379
  def process_second(self,
356
380
  ii: int,
@@ -393,7 +417,9 @@ class BackgroundSparseMed(Background):
393
417
 
394
418
  self.bg_images[ii] = self.shared_output.reshape(self.image_shape)
395
419
 
396
- self.image_proc.value = idx_stop / self.image_count
420
+ self.image_proc.value = idx_stop / (
421
+ # with offset correction, everything is slower
422
+ self.image_count * (1 + self.offset_correction))
397
423
 
398
424
 
399
425
  class WorkerSparseMed(mp_spawn.Process):
@@ -403,6 +403,12 @@ class DCNumJobRunner(threading.Thread):
403
403
  features=orig_feats,
404
404
  mapping=None)
405
405
 
406
+ # Handle basin data according to the user's request
407
+ self.state = "plumbing"
408
+ self.task_enforce_basin_strategy()
409
+
410
+ self.state = "cleanup"
411
+
406
412
  with HDF5Writer(self.path_temp_out) as hw:
407
413
  # pipeline metadata
408
414
  hw.h5.attrs["pipeline:dcnum generation"] = self.ppdict["gen_id"]
@@ -462,11 +468,7 @@ class DCNumJobRunner(threading.Thread):
462
468
 
463
469
  # copy metadata/logs/tables from original file
464
470
  with h5py.File(self.job["path_in"]) as h5_src:
465
- copy_metadata(h5_src=h5_src,
466
- h5_dst=hw.h5,
467
- # Don't copy basins, we would have to index-map
468
- # them first.
469
- copy_basins=False)
471
+ copy_metadata(h5_src=h5_src, h5_dst=hw.h5)
470
472
  if redo_seg:
471
473
  # Store the correct measurement identifier. This is used to
472
474
  # identify this file as a correct basin in subsequent pipeline
@@ -490,12 +492,6 @@ class DCNumJobRunner(threading.Thread):
490
492
  mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
491
493
  hw.h5.attrs["experiment:run identifier"] = mid_new
492
494
 
493
- # Handle basin data according to the user's request
494
- self.state = "plumbing"
495
- self.task_enforce_basin_strategy()
496
-
497
- self.state = "cleanup"
498
-
499
495
  trun = datetime.timedelta(seconds=round(time.monotonic() - time_start))
500
496
  self.logger.info(f"Run duration: {str(trun)}")
501
497
  self.logger.info(time.strftime("Run stop: %Y-%m-%d-%H.%M.%S",
@@ -547,22 +543,17 @@ class DCNumJobRunner(threading.Thread):
547
543
  """
548
544
  self._progress_bn = 0
549
545
  t0 = time.perf_counter()
550
- # We need to make sure that the features are correctly attributed
551
- # from the input files. E.g. if the input file already has
552
- # background images, but we recompute the background images, then
553
- # we have to use the data from the recomputed background file.
554
- # We achieve this by keeping a specific order and only copying those
555
- # features that we don't already have in the output file.
556
- feats_raw = [
557
- # 1. background data from the temporary input image
558
- # (this must come before draw [sic!])
559
- [self.dtin.h5, ["image_bg", "bg_off"], "critical"],
560
- # 2. frame-based scalar features from the raw input file
561
- # (e.g. "temp" or "frame")
562
- [self.draw.h5, self.draw.features_scalar_frame, "optional"],
563
- # 3. image features from the input file
564
- [self.draw.h5, ["image", "image_bg", "bg_off"], "optional"],
565
- ]
546
+ # We have these points to consider:
547
+ # - We must use the `basinmap` feature to map from the original
548
+ # file to the output file.
549
+ # - We must copy "bg_off" and "image_bg" to the output file.
550
+ # - For the "drain" basin strategy, we also have to copy all the
551
+ # other features.
552
+ # - If "image_bg" is defined as an internal basin in the input
553
+ # file, we have to convert the mapping and store a corresponding
554
+ # internal basin in the output file.
555
+
556
+ # Determine the basinmap feature
566
557
  with HDF5Writer(self.path_temp_out) as hw:
567
558
  hout = hw.h5
568
559
  # First, we have to determine the basin mapping from input to
@@ -584,14 +575,15 @@ class DCNumJobRunner(threading.Thread):
584
575
  # to the original input HDF5 file.
585
576
  raw_im = self.draw.index_mapping
586
577
  if raw_im is None:
587
- self.logger.info("Input file mapped with basinmap0")
588
578
  # Create a hard link to save time and space
589
579
  hout["events/basinmap0"] = hout["events/index_unmapped"]
590
- basinmap = idx_um
580
+ basinmap0 = idx_um
591
581
  else:
592
- basinmap = get_mapping_indices(raw_im)[idx_um]
582
+ self.logger.info("Converting input mapping")
583
+ basinmap0 = get_mapping_indices(raw_im)[idx_um]
593
584
  # Store the mapped basin data in the output file.
594
- hw.store_feature_chunk("basinmap0", basinmap)
585
+ hw.store_feature_chunk("basinmap0", basinmap0)
586
+ self.logger.info("Input mapped to output with basinmap0")
595
587
  # We don't need them anymore.
596
588
  del hout["events/index_unmapped"]
597
589
 
@@ -599,19 +591,72 @@ class DCNumJobRunner(threading.Thread):
599
591
  # is the size of the raw dataset and the latter is its mapped
600
592
  # size!
601
593
  size_raw = self.draw.h5.attrs["experiment:event count"]
602
- if (len(basinmap) == size_raw
603
- and np.all(basinmap == np.arange(size_raw))):
594
+ if (len(basinmap0) == size_raw
595
+ and np.all(basinmap0 == np.arange(size_raw))):
604
596
  # This means that the images in the input overlap perfectly
605
597
  # with the images in the output, i.e. a "copy" segmenter
606
598
  # was used or something is very reproducible.
607
599
  # We set basinmap to None to be more efficient.
608
- basinmap = None
600
+ basinmap0 = None
609
601
 
610
602
  else:
611
603
  # The input is identical to the output, because we are using
612
604
  # the same pipeline identifier.
613
- basinmap = None
614
-
605
+ basinmap0 = None
606
+
607
+ # List of features we have to copy from input to output.
608
+ # We need to make sure that the features are correctly attributed
609
+ # from the input files. E.g. if the input file already has
610
+ # background images, but we recompute the background images, then
611
+ # we have to use the data from the recomputed background file.
612
+ # We achieve this by keeping a specific order and only copying
613
+ # those features that we don't already have in the output file.
614
+ feats_raw = [
615
+ # background data from the temporary input image
616
+ [self.dtin.h5, ["bg_off"], "critical"],
617
+ [self.draw.h5, self.draw.features_scalar_frame, "optional"],
618
+ [self.draw.h5, ["image", "bg_off"], "optional"],
619
+ ]
620
+
621
+ # Store image_bg as an internal basin, if defined in input
622
+ for idx in range(len(self.dtin.basins)):
623
+ bn_dict = self.dtin.basins[idx]
624
+ if (bn_dict["type"] == "internal"
625
+ and "image_bg" in bn_dict["features"]):
626
+ self.logger.info(
627
+ "Copying internal basin background images")
628
+ bn_grp, bn_feats, bn_map = self.dtin.get_basin_data(idx)
629
+ assert "image_bg" in bn_feats
630
+ # Load all images into memory (should only be ~600)
631
+ bg_images1 = self.dtin.h5["basin_events"]["image_bg"][:]
632
+ # Get the original internal mapping for these images
633
+ # Note that `basinmap0` always refers to indices in the
634
+ # original raw input file, and not to indices in an
635
+ # optional mapped input file (using `index_mapping`).
636
+ # Therefore, we do `self.dtin.h5["events"]["basinmap0"]`
637
+ # instead of `self.dtin["basinmap0"]`
638
+ basinmap_in = self.dtin.h5["events"][bn_dict["mapping"]][:]
639
+ # Now we have to convert the indices in `basinmap_in`
640
+ # to indices in the output file.
641
+ basinmap1 = basinmap_in[basinmap0]
642
+ # Store the internal mapping in the output file
643
+ hw.store_basin(name=bn_dict["name"],
644
+ description=bn_dict["description"],
645
+ mapping=basinmap1,
646
+ internal_data={"image_bg": bg_images1}
647
+ )
648
+ break
649
+ else:
650
+ self.logger.info("Background images must be copied")
651
+ # There is no internal image_bg feature, probably because
652
+ # the user did not use the sparsemed background correction.
653
+ # In this case, we simply add "image_bg" to the `feats_raw`.
654
+ feats_raw += [
655
+ [self.dtin.h5, ["image_bg"], "critical"],
656
+ [self.draw.h5, ["image_bg"], "optional"],
657
+ ]
658
+
659
+ # Copy the features required in the output file.
615
660
  for hin, feats, importance in feats_raw:
616
661
  # Only consider features that are available in the input
617
662
  # and that are not already in the output.
@@ -626,7 +671,7 @@ class DCNumJobRunner(threading.Thread):
626
671
  copy_features(h5_src=hin,
627
672
  h5_dst=hout,
628
673
  features=feats,
629
- mapping=basinmap)
674
+ mapping=basinmap0)
630
675
  else:
631
676
  # TAP: Create basins for the "optional" features in the
632
677
  # output file. Note that the "critical" features never
@@ -638,7 +683,7 @@ class DCNumJobRunner(threading.Thread):
638
683
  paths = [pin, os.path.relpath(pin, pout)]
639
684
  hw.store_basin(name="dcnum basin",
640
685
  features=feats,
641
- mapping=basinmap,
686
+ mapping=basinmap0,
642
687
  paths=paths,
643
688
  description=f"Created with dcnum {version}",
644
689
  )
@@ -10,7 +10,7 @@ import warnings
10
10
 
11
11
  #: Increment this string if there are breaking changes that make
12
12
  #: previous pipelines unreproducible.
13
- DCNUM_PPID_GENERATION = "10"
13
+ DCNUM_PPID_GENERATION = "11"
14
14
 
15
15
 
16
16
  class ClassWithPPIDCapabilities(Protocol):