dcnum 0.19.0__tar.gz → 0.20.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dcnum might be problematic. Click here for more details.
- {dcnum-0.19.0 → dcnum-0.20.0}/CHANGELOG +10 -0
- {dcnum-0.19.0/src/dcnum.egg-info → dcnum-0.20.0}/PKG-INFO +1 -1
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/_version.py +2 -2
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_background/base.py +3 -3
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_background/bg_copy.py +10 -8
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_background/bg_roll_median.py +2 -2
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_background/bg_sparse_median.py +2 -2
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/logic/ctrl.py +154 -35
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/logic/job.py +55 -2
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/read/__init__.py +1 -1
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/read/cache.py +24 -2
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/read/hdf5_data.py +38 -11
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/read/mapped.py +11 -3
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/write/__init__.py +2 -1
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/write/deque_writer_thread.py +9 -1
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/write/queue_collector_thread.py +8 -14
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/write/writer.py +91 -0
- {dcnum-0.19.0 → dcnum-0.20.0/src/dcnum.egg-info}/PKG-INFO +1 -1
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum.egg-info/SOURCES.txt +1 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_background_bg_copy.py +27 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_logic_pipeline.py +79 -28
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_meta_ppid_data.py +3 -1
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_read_hdf5.py +19 -0
- dcnum-0.20.0/tests/test_read_hdf5_basins.py +280 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_write_queue_collector_thread.py +4 -4
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_write_writer.py +73 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/.github/workflows/check.yml +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/.github/workflows/deploy_pypi.yml +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/.gitignore +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/.readthedocs.yml +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/LICENSE +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/README.rst +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/docs/conf.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/docs/extensions/github_changelog.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/docs/index.rst +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/docs/requirements.txt +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/pyproject.toml +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/setup.cfg +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/__init__.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/__init__.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/event_extractor_manager_thread.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_background/__init__.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_brightness/__init__.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_brightness/bright_all.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_brightness/common.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_contour/__init__.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_contour/contour.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_contour/moments.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_contour/volume.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_texture/__init__.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_texture/common.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/feat_texture/tex_all.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/gate.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/feat/queue_event_extractor.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/logic/__init__.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/logic/json_encoder.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/meta/__init__.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/meta/paths.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/meta/ppid.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/read/const.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/segm/__init__.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/segm/segm_thresh.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/segm/segmenter.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/segm/segmenter_cpu.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/segm/segmenter_gpu.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum/segm/segmenter_manager_thread.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum.egg-info/dependency_links.txt +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum.egg-info/requires.txt +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/src/dcnum.egg-info/top_level.txt +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/conftest.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/data/fmt-hdf5_cytoshot_extended-moments-features.zip +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/data/fmt-hdf5_cytoshot_full-features_2023.zip +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/data/fmt-hdf5_cytoshot_full-features_2024.zip +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/data/fmt-hdf5_cytoshot_full-features_legacy_allev_2023.zip +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/data/fmt-hdf5_shapein_empty.zip +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/data/fmt-hdf5_shapein_raw-with-variable-length-logs.zip +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/helper_methods.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/requirements.txt +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_background_base.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_background_bg_roll_median.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_background_bg_sparsemed.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_brightness.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_event_extractor_manager.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_gate.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_haralick.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_moments_based.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_moments_based_extended.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_feat_volume.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_init.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_logic_job.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_logic_join.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_logic_json.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_meta_paths.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_meta_ppid_base.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_meta_ppid_bg.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_meta_ppid_feat.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_meta_ppid_gate.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_meta_ppid_segm.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_read_basin.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_read_concat_hdf5.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_read_hdf5_index_mapping.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_segm_base.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_segm_no_mask_proc.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_segm_thresh.py +0 -0
- {dcnum-0.19.0 → dcnum-0.20.0}/tests/test_write_deque_writer_thread.py +0 -0
|
@@ -1,3 +1,13 @@
|
|
|
1
|
+
0.20.0
|
|
2
|
+
- feat: support reading mapped basins
|
|
3
|
+
- feat: support writing mapped-basin-based output files
|
|
4
|
+
- fix: copy "bg_off" data to output file when copying background data
|
|
5
|
+
- enh: allow to slice BaseImageChunkCache
|
|
6
|
+
- enh: sort logs, tables and basins for reproducible access
|
|
7
|
+
- enh: add more timing information in logs
|
|
8
|
+
- ref: background progress value is now a double between 0 and 1
|
|
9
|
+
0.19.1
|
|
10
|
+
- enh: support steps when specifying data slices in `index_mapping`
|
|
1
11
|
0.19.0
|
|
2
12
|
- enh: elevate `HDF5Data`s `index_mapping` to pipeline identifier status
|
|
3
13
|
(this changes the pipeline identifier)
|
|
@@ -62,8 +62,8 @@ class Background(abc.ABC):
|
|
|
62
62
|
|
|
63
63
|
#: number of images in the input data
|
|
64
64
|
self.image_count = None
|
|
65
|
-
#:
|
|
66
|
-
self.image_proc = mp_spawn.Value("
|
|
65
|
+
#: fraction images that have been processed
|
|
66
|
+
self.image_proc = mp_spawn.Value("d", 0)
|
|
67
67
|
|
|
68
68
|
#: HDF5Data instance for input data
|
|
69
69
|
self.hdin = None
|
|
@@ -185,7 +185,7 @@ class Background(abc.ABC):
|
|
|
185
185
|
if self.image_count == 0:
|
|
186
186
|
return 0.
|
|
187
187
|
else:
|
|
188
|
-
return self.image_proc.value
|
|
188
|
+
return self.image_proc.value
|
|
189
189
|
|
|
190
190
|
def process(self):
|
|
191
191
|
# Delete any old background data
|
|
@@ -9,17 +9,19 @@ class BackgroundCopy(Background):
|
|
|
9
9
|
pass
|
|
10
10
|
|
|
11
11
|
def process(self):
|
|
12
|
-
"""
|
|
12
|
+
"""Copy input data to output dataset"""
|
|
13
13
|
if self.h5in != self.h5out:
|
|
14
|
-
hin = self.hdin.
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
14
|
+
hin = self.hdin.h5
|
|
15
|
+
for feat in ["image_bg", "bg_off"]:
|
|
16
|
+
if feat in hin["events"]:
|
|
17
|
+
h5py.h5o.copy(src_loc=hin["events"].id,
|
|
18
|
+
src_name=feat.encode("utf-8"),
|
|
19
|
+
dst_loc=self.h5out["events"].id,
|
|
20
|
+
dst_name=feat.encode("utf-8"),
|
|
21
|
+
)
|
|
20
22
|
|
|
21
23
|
# set progress to 100%
|
|
22
|
-
self.image_proc.value =
|
|
24
|
+
self.image_proc.value = 1
|
|
23
25
|
|
|
24
26
|
def process_approach(self):
|
|
25
27
|
# We do the copying in `process`, because we do not want to modify
|
|
@@ -184,7 +184,7 @@ class BackgroundRollMed(Background):
|
|
|
184
184
|
num_remaining,
|
|
185
185
|
axis=0)
|
|
186
186
|
self.writer.store_feature_chunk("image_bg", last_chunk)
|
|
187
|
-
|
|
187
|
+
self.image_proc.value = 1
|
|
188
188
|
|
|
189
189
|
def process_next_batch(self):
|
|
190
190
|
"""Process one batch of input data"""
|
|
@@ -223,7 +223,7 @@ class BackgroundRollMed(Background):
|
|
|
223
223
|
)
|
|
224
224
|
|
|
225
225
|
self.current_batch += 1
|
|
226
|
-
self.image_proc.value += self.batch_size
|
|
226
|
+
self.image_proc.value += self.batch_size / self.image_count
|
|
227
227
|
|
|
228
228
|
|
|
229
229
|
class WorkerRollMed(mp_spawn.Process):
|
|
@@ -329,7 +329,7 @@ class BackgroundSparseMed(Background):
|
|
|
329
329
|
# Fill up remainder of index array with last entry
|
|
330
330
|
bg_idx[idx1:] = ii
|
|
331
331
|
|
|
332
|
-
self.image_proc.value =
|
|
332
|
+
self.image_proc.value = 1
|
|
333
333
|
|
|
334
334
|
# Write background data
|
|
335
335
|
pos = 0
|
|
@@ -393,7 +393,7 @@ class BackgroundSparseMed(Background):
|
|
|
393
393
|
|
|
394
394
|
self.bg_images[ii] = self.shared_output.reshape(self.image_shape)
|
|
395
395
|
|
|
396
|
-
self.image_proc.value = idx_stop
|
|
396
|
+
self.image_proc.value = idx_stop / self.image_count
|
|
397
397
|
|
|
398
398
|
|
|
399
399
|
class WorkerSparseMed(mp_spawn.Process):
|
|
@@ -14,6 +14,7 @@ import traceback
|
|
|
14
14
|
import uuid
|
|
15
15
|
|
|
16
16
|
import h5py
|
|
17
|
+
import numpy as np
|
|
17
18
|
|
|
18
19
|
from ..feat.feat_background.base import get_available_background_methods
|
|
19
20
|
from ..feat.queue_event_extractor import QueueEventExtractor
|
|
@@ -21,10 +22,10 @@ from ..feat import gate
|
|
|
21
22
|
from ..feat import EventExtractorManagerThread
|
|
22
23
|
from ..segm import SegmenterManagerThread, get_available_segmenters
|
|
23
24
|
from ..meta import ppid
|
|
24
|
-
from ..read import HDF5Data
|
|
25
|
-
from .._version import version_tuple
|
|
25
|
+
from ..read import HDF5Data, get_mapping_indices
|
|
26
|
+
from .._version import version, version_tuple
|
|
26
27
|
from ..write import (
|
|
27
|
-
DequeWriterThread, HDF5Writer, QueueCollectorThread,
|
|
28
|
+
DequeWriterThread, HDF5Writer, QueueCollectorThread, copy_features,
|
|
28
29
|
copy_metadata, create_with_basins, set_default_filter_kwargs
|
|
29
30
|
)
|
|
30
31
|
|
|
@@ -43,6 +44,7 @@ valid_states = [
|
|
|
43
44
|
"setup",
|
|
44
45
|
"background",
|
|
45
46
|
"segmentation",
|
|
47
|
+
"plumbing",
|
|
46
48
|
"cleanup",
|
|
47
49
|
"done",
|
|
48
50
|
"error",
|
|
@@ -79,8 +81,9 @@ class DCNumJobRunner(threading.Thread):
|
|
|
79
81
|
# current job state
|
|
80
82
|
self._state = "init"
|
|
81
83
|
# overall progress [0, 1]
|
|
82
|
-
self._progress_bg = None
|
|
83
|
-
self._progress_ex = None
|
|
84
|
+
self._progress_bg = None # background
|
|
85
|
+
self._progress_ex = None # segmentation
|
|
86
|
+
self._progress_bn = None # creating basins
|
|
84
87
|
# segmentation frame rate
|
|
85
88
|
self._segm_rate = 0
|
|
86
89
|
|
|
@@ -237,8 +240,12 @@ class DCNumJobRunner(threading.Thread):
|
|
|
237
240
|
# how much fractional time each processing step takes.
|
|
238
241
|
bgw = 4 # fraction of background
|
|
239
242
|
exw = 27 # fraction of segmentation and feature extraction
|
|
243
|
+
if self.job["basin_strategy"] == "drain":
|
|
244
|
+
drw = 15 # because data need to be copied
|
|
245
|
+
else:
|
|
246
|
+
drw = 1 # just creating the basins in output file
|
|
240
247
|
clw = 1 # fraction of cleanup operations
|
|
241
|
-
tot = bgw + exw + clw
|
|
248
|
+
tot = bgw + exw + drw + clw
|
|
242
249
|
progress = 0
|
|
243
250
|
st = self.state
|
|
244
251
|
|
|
@@ -247,15 +254,22 @@ class DCNumJobRunner(threading.Thread):
|
|
|
247
254
|
# background already computed
|
|
248
255
|
progress += bgw / tot
|
|
249
256
|
elif self._progress_bg is not None:
|
|
250
|
-
# This is the image count of the input dataset
|
|
251
|
-
progress +=
|
|
257
|
+
# This is the image count of the input dataset.
|
|
258
|
+
progress += self._progress_bg.value * bgw / tot
|
|
252
259
|
|
|
253
260
|
# segmentation
|
|
254
261
|
if valid_states.index(st) > valid_states.index("segmentation"):
|
|
255
262
|
# segmentation already done
|
|
256
263
|
progress += exw / tot
|
|
257
264
|
elif self._progress_ex is not None:
|
|
258
|
-
progress += exw / tot
|
|
265
|
+
progress += self._progress_ex * exw / tot
|
|
266
|
+
|
|
267
|
+
# draining basins
|
|
268
|
+
if valid_states.index(st) > valid_states.index("plumbing"):
|
|
269
|
+
# plumbing already done
|
|
270
|
+
progress += drw / tot
|
|
271
|
+
if self._progress_bn is not None:
|
|
272
|
+
progress += self._progress_bn * drw / tot
|
|
259
273
|
|
|
260
274
|
if self.state == "done":
|
|
261
275
|
progress = 1
|
|
@@ -371,16 +385,20 @@ class DCNumJobRunner(threading.Thread):
|
|
|
371
385
|
# Note any new actions that work on `self.path_temp_in` are not
|
|
372
386
|
# reflected in `self.path_temp_out`.
|
|
373
387
|
self.path_temp_in.rename(self.path_temp_out)
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
388
|
+
# Since no segmentation was done, the output file now does not
|
|
389
|
+
# contain any events. This is not really what we wanted, but we
|
|
390
|
+
# can still store all features in the output file if required.
|
|
391
|
+
if self.job["basin_strategy"] == "drain":
|
|
392
|
+
orig_feats = []
|
|
393
|
+
for feat in self.draw.h5["events"].keys():
|
|
394
|
+
if isinstance(self.draw.h5["events"][feat], h5py.Dataset):
|
|
395
|
+
# copy_features does not support Groups
|
|
396
|
+
orig_feats.append(feat)
|
|
397
|
+
with h5py.File(self.path_temp_out, "a") as h5_dst:
|
|
398
|
+
copy_features(h5_src=self.draw.h5,
|
|
399
|
+
h5_dst=h5_dst,
|
|
400
|
+
features=orig_feats,
|
|
401
|
+
mapping=None)
|
|
384
402
|
|
|
385
403
|
with HDF5Writer(self.path_temp_out) as hw:
|
|
386
404
|
# pipeline metadata
|
|
@@ -433,7 +451,8 @@ class DCNumJobRunner(threading.Thread):
|
|
|
433
451
|
with h5py.File(self.job["path_in"]) as h5_src:
|
|
434
452
|
copy_metadata(h5_src=h5_src,
|
|
435
453
|
h5_dst=hw.h5,
|
|
436
|
-
#
|
|
454
|
+
# Don't copy basins, we would have to index-map
|
|
455
|
+
# them first.
|
|
437
456
|
copy_basins=False)
|
|
438
457
|
if redo_seg:
|
|
439
458
|
# Store the correct measurement identifier. This is used to
|
|
@@ -450,6 +469,12 @@ class DCNumJobRunner(threading.Thread):
|
|
|
450
469
|
mid_new = f"{mid_cur}_{mid_ap}" if mid_cur else mid_ap
|
|
451
470
|
hw.h5.attrs["experiment:run identifier"] = mid_new
|
|
452
471
|
|
|
472
|
+
# Handle basin data according to the user's request
|
|
473
|
+
self.state = "plumbing"
|
|
474
|
+
self.task_enforce_basin_strategy()
|
|
475
|
+
|
|
476
|
+
self.state = "cleanup"
|
|
477
|
+
|
|
453
478
|
trun = datetime.timedelta(seconds=round(time.monotonic() - time_start))
|
|
454
479
|
self.logger.info(f"Run duration: {str(trun)}")
|
|
455
480
|
self.logger.info(time.strftime("Run stop: %Y-%m-%d-%H.%M.%S",
|
|
@@ -491,6 +516,115 @@ class DCNumJobRunner(threading.Thread):
|
|
|
491
516
|
bic.process()
|
|
492
517
|
self.logger.info("Finished background computation")
|
|
493
518
|
|
|
519
|
+
def task_enforce_basin_strategy(self):
|
|
520
|
+
"""Transfer basin data from input files to output if requested
|
|
521
|
+
|
|
522
|
+
The user specified the "basin_strategy" keyword argument in
|
|
523
|
+
`self.job`. If this is set to "drain", then copy all basin
|
|
524
|
+
information from the input file to the output file. If it
|
|
525
|
+
is set to "tap", then only create basins in the output file.
|
|
526
|
+
"""
|
|
527
|
+
self._progress_bn = 0
|
|
528
|
+
t0 = time.perf_counter()
|
|
529
|
+
# We need to make sure that the features are correctly attributed
|
|
530
|
+
# from the input files. E.g. if the input file already has
|
|
531
|
+
# background images, but we recompute the background images, then
|
|
532
|
+
# we have to use the data from the recomputed background file.
|
|
533
|
+
# We achieve this by keeping a specific order and only copying those
|
|
534
|
+
# features that we don't already have in the output file.
|
|
535
|
+
feats_raw = [
|
|
536
|
+
# 1. background data from the temporary input image
|
|
537
|
+
# (this must come before draw [sic!])
|
|
538
|
+
[self.dtin.h5, ["image_bg", "bg_off"], "critical"],
|
|
539
|
+
# 2. frame-based scalar features from the raw input file
|
|
540
|
+
# (e.g. "temp" or "frame")
|
|
541
|
+
[self.draw.h5, self.draw.features_scalar_frame, "optional"],
|
|
542
|
+
# 3. image features from the input file
|
|
543
|
+
[self.draw.h5, ["image", "image_bg", "bg_off"], "optional"],
|
|
544
|
+
]
|
|
545
|
+
with h5py.File(self.path_temp_out, "a") as hout:
|
|
546
|
+
hw = HDF5Writer(hout)
|
|
547
|
+
# First, we have to determine the basin mapping from input to
|
|
548
|
+
# output. This information is stored by the QueueCollectorThread
|
|
549
|
+
# in the "basinmap0" feature, ready to be used by us.
|
|
550
|
+
if "index_unmapped" in hout["events"]:
|
|
551
|
+
# The unmapped indices enumerate the events in the output file
|
|
552
|
+
# with indices from the mapped input file. E.g. if for the
|
|
553
|
+
# first image in the input file, two events are found and for
|
|
554
|
+
# the second image in the input file, three events are found,
|
|
555
|
+
# then this would contain [0, 0, 1, 1, 1, ...]. If the index
|
|
556
|
+
# mapping of the input file was set to slice(1, 100), then the
|
|
557
|
+
# first image would not be there, and we would have
|
|
558
|
+
# [1, 1, 1, ...].
|
|
559
|
+
idx_um = hout["events/index_unmapped"]
|
|
560
|
+
|
|
561
|
+
# If we want to convert this to an actual basinmap feature,
|
|
562
|
+
# then we have to convert those indices to indices that map
|
|
563
|
+
# to the original input HDF5 file.
|
|
564
|
+
raw_im = self.draw.index_mapping
|
|
565
|
+
if raw_im is None:
|
|
566
|
+
self.logger.info("Input file mapped with basinmap0")
|
|
567
|
+
# Create a hard link to save time and space
|
|
568
|
+
hout["events/basinmap0"] = hout["events/index_unmapped"]
|
|
569
|
+
basinmap = idx_um
|
|
570
|
+
else:
|
|
571
|
+
basinmap = get_mapping_indices(raw_im)[idx_um]
|
|
572
|
+
# Store the mapped basin data in the output file.
|
|
573
|
+
hw.store_feature_chunk("basinmap0", basinmap)
|
|
574
|
+
# We don't need them anymore.
|
|
575
|
+
del hout["events/index_unmapped"]
|
|
576
|
+
|
|
577
|
+
# Note that `size_raw != (len(self.draw))` [sic!]. The former
|
|
578
|
+
# is the size of the raw dataset and the latter is its mapped
|
|
579
|
+
# size!
|
|
580
|
+
size_raw = self.draw.h5.attrs["experiment:event count"]
|
|
581
|
+
if (len(basinmap) == size_raw
|
|
582
|
+
and np.all(basinmap == np.arange(size_raw))):
|
|
583
|
+
# This means that the images in the input overlap perfectly
|
|
584
|
+
# with the images in the output, i.e. a "copy" segmenter
|
|
585
|
+
# was used or something is very reproducible.
|
|
586
|
+
# We set basinmap to None to be more efficient.
|
|
587
|
+
basinmap = None
|
|
588
|
+
|
|
589
|
+
else:
|
|
590
|
+
# The input is identical to the output, because we are using
|
|
591
|
+
# the same pipeline identifier.
|
|
592
|
+
basinmap = None
|
|
593
|
+
|
|
594
|
+
for hin, feats, importance in feats_raw:
|
|
595
|
+
# Only consider features that are available in the input
|
|
596
|
+
# and that are not already in the output.
|
|
597
|
+
feats = [f for f in feats
|
|
598
|
+
if (f in hin["events"] and f not in hout["events"])]
|
|
599
|
+
if not feats:
|
|
600
|
+
continue
|
|
601
|
+
elif (self.job["basin_strategy"] == "drain"
|
|
602
|
+
or importance == "critical"):
|
|
603
|
+
# DRAIN: Copy all features over to the output file.
|
|
604
|
+
self.logger.debug(f"Transferring {feats} to output file")
|
|
605
|
+
copy_features(h5_src=hin,
|
|
606
|
+
h5_dst=hout,
|
|
607
|
+
features=feats,
|
|
608
|
+
mapping=basinmap)
|
|
609
|
+
else:
|
|
610
|
+
# TAP: Create basins for the "optional" features in the
|
|
611
|
+
# output file. Note that the "critical" features never
|
|
612
|
+
# reach this case.
|
|
613
|
+
self.logger.debug(f"Creating basin for {feats}")
|
|
614
|
+
# Relative and absolute paths.
|
|
615
|
+
pin = pathlib.Path(hin.filename).resolve()
|
|
616
|
+
pout = pathlib.Path(hout.filename).resolve()
|
|
617
|
+
paths = [pin, os.path.relpath(pin, pout)]
|
|
618
|
+
hw.store_basin(name="dcnum basin",
|
|
619
|
+
features=feats,
|
|
620
|
+
mapping=basinmap,
|
|
621
|
+
paths=paths,
|
|
622
|
+
description=f"Created with dcnum {version}",
|
|
623
|
+
)
|
|
624
|
+
self._progress_bn += 1 / len(feats_raw)
|
|
625
|
+
t_tot = time.perf_counter() - t0
|
|
626
|
+
self.logger.info(f"Enforcing basin strategy time: {t_tot:.1f}s")
|
|
627
|
+
|
|
494
628
|
def task_segment_extract(self):
|
|
495
629
|
self.logger.info("Starting segmentation and feature extraction")
|
|
496
630
|
# Start writer thread
|
|
@@ -629,21 +763,6 @@ class DCNumJobRunner(threading.Thread):
|
|
|
629
763
|
|
|
630
764
|
self.logger.info("Finished segmentation and feature extraction")
|
|
631
765
|
|
|
632
|
-
def task_transfer_basin_data(self):
|
|
633
|
-
with h5py.File(self.path_temp_out, "a") as hout:
|
|
634
|
-
hd = HDF5Data(hout)
|
|
635
|
-
for ii, _ in enumerate(hd.basins):
|
|
636
|
-
hindat, features = hd.get_basin_data(ii)
|
|
637
|
-
for feat in features:
|
|
638
|
-
if feat not in hout["events"]:
|
|
639
|
-
self.logger.debug(
|
|
640
|
-
f"Transferring {feat} to output file")
|
|
641
|
-
h5py.h5o.copy(src_loc=hindat.h5["events"].id,
|
|
642
|
-
src_name=feat.encode(),
|
|
643
|
-
dst_loc=hout["events"].id,
|
|
644
|
-
dst_name=feat.encode(),
|
|
645
|
-
)
|
|
646
|
-
|
|
647
766
|
|
|
648
767
|
def join_thread_helper(thr, timeout, retries, logger, name):
|
|
649
768
|
for _ in range(retries):
|
|
@@ -3,7 +3,8 @@ import copy
|
|
|
3
3
|
import inspect
|
|
4
4
|
import multiprocessing as mp
|
|
5
5
|
import pathlib
|
|
6
|
-
from typing import Dict
|
|
6
|
+
from typing import Dict, Literal
|
|
7
|
+
import warnings
|
|
7
8
|
|
|
8
9
|
from ..feat import QueueEventExtractor
|
|
9
10
|
from ..feat.feat_background.base import get_available_background_methods
|
|
@@ -27,10 +28,62 @@ class DCNumPipelineJob:
|
|
|
27
28
|
feature_kwargs: Dict = None,
|
|
28
29
|
gate_code: str = "norm",
|
|
29
30
|
gate_kwargs: Dict = None,
|
|
30
|
-
|
|
31
|
+
basin_strategy: Literal["drain", "tap"] = "drain",
|
|
32
|
+
no_basins_in_output: bool = None,
|
|
31
33
|
num_procs: int = None,
|
|
32
34
|
debug: bool = False,
|
|
33
35
|
):
|
|
36
|
+
"""Pipeline job recipe
|
|
37
|
+
|
|
38
|
+
Parameters
|
|
39
|
+
----------
|
|
40
|
+
path_in: pathlib.Path | str
|
|
41
|
+
input data path
|
|
42
|
+
path_out: pathlib.Path | str
|
|
43
|
+
output data path
|
|
44
|
+
data_code: str
|
|
45
|
+
code of input data reader to use
|
|
46
|
+
data_kwargs: dict
|
|
47
|
+
keyword arguments for data reader
|
|
48
|
+
background_code: str
|
|
49
|
+
code of background data computer to use
|
|
50
|
+
background_kwargs: dict
|
|
51
|
+
keyword arguments for background data computer
|
|
52
|
+
segmenter_code: str
|
|
53
|
+
code of segmenter to use
|
|
54
|
+
segmenter_kwargs: dict
|
|
55
|
+
keyword arguments for segmenter
|
|
56
|
+
feature_code: str
|
|
57
|
+
code of feature extractor
|
|
58
|
+
feature_kwargs: dict
|
|
59
|
+
keyword arguments for feature extractor
|
|
60
|
+
gate_code: str
|
|
61
|
+
code for gating/event filtering class
|
|
62
|
+
gate_kwargs: dict
|
|
63
|
+
keyword arguments for gating/event filtering class
|
|
64
|
+
basin_strategy: str
|
|
65
|
+
strategy on how to handle event data; In principle, not all
|
|
66
|
+
events have to be stored in the output file if basins are
|
|
67
|
+
defined, linking back to the original file.
|
|
68
|
+
- You can "drain" all basins which means that the output file
|
|
69
|
+
will contain all features, but will also be very big.
|
|
70
|
+
- You can "tap" the basins, including the input file, which means
|
|
71
|
+
that the output file will be comparatively small.
|
|
72
|
+
no_basins_in_output: bool
|
|
73
|
+
Deprecated
|
|
74
|
+
num_procs: int
|
|
75
|
+
Number of processes to use
|
|
76
|
+
debug: bool
|
|
77
|
+
Whether to be verbose and use threads instead of processes
|
|
78
|
+
"""
|
|
79
|
+
if no_basins_in_output is not None:
|
|
80
|
+
warnings.warn("The `no_basins_in_output` keyword argument is "
|
|
81
|
+
"deprecated. Please use `basin_strategy` instead.")
|
|
82
|
+
if no_basins_in_output:
|
|
83
|
+
basin_strategy = "drain"
|
|
84
|
+
else:
|
|
85
|
+
basin_strategy = "tap"
|
|
86
|
+
|
|
34
87
|
#: initialize keyword arguments for this job
|
|
35
88
|
self.kwargs = {}
|
|
36
89
|
spec = inspect.getfullargspec(DCNumPipelineJob.__init__)
|
|
@@ -22,6 +22,7 @@ class BaseImageChunkCache(abc.ABC):
|
|
|
22
22
|
cache_size: int = 2,
|
|
23
23
|
):
|
|
24
24
|
self.shape = shape
|
|
25
|
+
self._dtype = None
|
|
25
26
|
chunk_size = min(shape[0], chunk_size)
|
|
26
27
|
self._len = self.shape[0]
|
|
27
28
|
#: This is a FILO cache for the chunks
|
|
@@ -33,12 +34,32 @@ class BaseImageChunkCache(abc.ABC):
|
|
|
33
34
|
self.num_chunks = int(np.ceil(self._len / (self.chunk_size or 1)))
|
|
34
35
|
|
|
35
36
|
def __getitem__(self, index):
|
|
36
|
-
|
|
37
|
-
|
|
37
|
+
if isinstance(index, (slice, list, np.ndarray)):
|
|
38
|
+
if isinstance(index, slice):
|
|
39
|
+
indices = np.arange(index.start or 0,
|
|
40
|
+
index.stop or len(self),
|
|
41
|
+
index.step)
|
|
42
|
+
else:
|
|
43
|
+
indices = index
|
|
44
|
+
array_out = np.empty((len(indices),) + self.image_shape,
|
|
45
|
+
dtype=self.dtype)
|
|
46
|
+
for ii, idx in enumerate(indices):
|
|
47
|
+
array_out[ii] = self[idx]
|
|
48
|
+
return array_out
|
|
49
|
+
else:
|
|
50
|
+
chunk_index, sub_index = self._get_chunk_index_for_index(index)
|
|
51
|
+
return self.get_chunk(chunk_index)[sub_index]
|
|
38
52
|
|
|
39
53
|
def __len__(self):
|
|
40
54
|
return self._len
|
|
41
55
|
|
|
56
|
+
@property
|
|
57
|
+
def dtype(self):
|
|
58
|
+
"""data type of the image data"""
|
|
59
|
+
if self._dtype is None:
|
|
60
|
+
self._dtype = self[0].dtype
|
|
61
|
+
return self._dtype
|
|
62
|
+
|
|
42
63
|
@abc.abstractmethod
|
|
43
64
|
def _get_chunk_data(self, chunk_slice):
|
|
44
65
|
"""Implemented in subclass to obtain actual data"""
|
|
@@ -50,6 +71,7 @@ class BaseImageChunkCache(abc.ABC):
|
|
|
50
71
|
raise IndexError(
|
|
51
72
|
f"Index {index} out of bounds for HDF5ImageCache "
|
|
52
73
|
f"of size {self._len}")
|
|
74
|
+
index = int(index) # convert np.uint64 to int, so we get ints below
|
|
53
75
|
chunk_index = index // self.chunk_size
|
|
54
76
|
sub_index = index % self.chunk_size
|
|
55
77
|
return chunk_index, sub_index
|
|
@@ -186,25 +186,27 @@ class HDF5Data:
|
|
|
186
186
|
if isinstance(self.meta[key], bytes):
|
|
187
187
|
self.meta[key] = self.meta[key].decode("utf-8")
|
|
188
188
|
# logs
|
|
189
|
-
for key in h5.get("logs",
|
|
189
|
+
for key in sorted(h5.get("logs", {}).keys()):
|
|
190
190
|
alog = list(h5["logs"][key])
|
|
191
191
|
if alog:
|
|
192
192
|
if isinstance(alog[0], bytes):
|
|
193
193
|
alog = [ll.decode("utf") for ll in alog]
|
|
194
194
|
self.logs[key] = alog
|
|
195
195
|
# tables
|
|
196
|
-
for tab in h5.get("tables",
|
|
196
|
+
for tab in sorted(h5.get("tables", {}).keys()):
|
|
197
197
|
tabdict = {}
|
|
198
198
|
for tkey in h5["tables"][tab].dtype.fields.keys():
|
|
199
199
|
tabdict[tkey] = \
|
|
200
200
|
np.array(h5["tables"][tab][tkey]).reshape(-1)
|
|
201
201
|
self.tables[tab] = tabdict
|
|
202
202
|
# basins
|
|
203
|
-
|
|
203
|
+
basins = []
|
|
204
|
+
for bnkey in h5.get("basins", {}).keys():
|
|
204
205
|
bn_data = "\n".join(
|
|
205
206
|
[s.decode() for s in h5["basins"][bnkey][:].tolist()])
|
|
206
207
|
bn_dict = json.loads(bn_data)
|
|
207
|
-
|
|
208
|
+
basins.append(bn_dict)
|
|
209
|
+
self.basins = sorted(basins, key=lambda x: x["name"])
|
|
208
210
|
|
|
209
211
|
if state["pixel_size"] is not None:
|
|
210
212
|
self.pixel_size = state["pixel_size"]
|
|
@@ -355,8 +357,12 @@ class HDF5Data:
|
|
|
355
357
|
elif val == "0":
|
|
356
358
|
kwargs["index_mapping"] = None
|
|
357
359
|
elif val.count("-"):
|
|
358
|
-
start, stop =
|
|
359
|
-
kwargs["index_mapping"] = slice(
|
|
360
|
+
start, stop, step = val.split("-")
|
|
361
|
+
kwargs["index_mapping"] = slice(
|
|
362
|
+
None if start == "n" else int(start),
|
|
363
|
+
None if stop == "n" else int(stop),
|
|
364
|
+
None if step == "n" else int(step)
|
|
365
|
+
)
|
|
360
366
|
else:
|
|
361
367
|
kwargs["index_mapping"] = int(val)
|
|
362
368
|
else:
|
|
@@ -391,7 +397,22 @@ class HDF5Data:
|
|
|
391
397
|
if path is None:
|
|
392
398
|
self._basin_data[index] = (None, None)
|
|
393
399
|
else:
|
|
394
|
-
|
|
400
|
+
feat_basinmap = bn_dict.get("mapping", None)
|
|
401
|
+
if feat_basinmap is None:
|
|
402
|
+
# This is NOT a mapped basin.
|
|
403
|
+
index_mapping = self.index_mapping
|
|
404
|
+
else:
|
|
405
|
+
# This is a mapped basin. Create an indexing list.
|
|
406
|
+
if self.index_mapping is None:
|
|
407
|
+
# The current dataset is not mapped.
|
|
408
|
+
basinmap_idx = slice(None)
|
|
409
|
+
else:
|
|
410
|
+
# The current dataset is also mapped.
|
|
411
|
+
basinmap_idx = get_mapping_indices(self.index_mapping)
|
|
412
|
+
basinmap = self.h5[f"events/{feat_basinmap}"]
|
|
413
|
+
index_mapping = basinmap[basinmap_idx]
|
|
414
|
+
|
|
415
|
+
h5dat = HDF5Data(path, index_mapping=index_mapping)
|
|
395
416
|
features = bn_dict.get("features")
|
|
396
417
|
if features is None:
|
|
397
418
|
# Only get the features from the actual HDF5 file.
|
|
@@ -416,21 +437,27 @@ class HDF5Data:
|
|
|
416
437
|
if feat not in self._image_cache:
|
|
417
438
|
if f"events/{feat}" in self.h5:
|
|
418
439
|
ds = self.h5[f"events/{feat}"]
|
|
440
|
+
idx_map = self.index_mapping
|
|
419
441
|
else:
|
|
442
|
+
idx_map = None
|
|
420
443
|
# search all basins
|
|
421
444
|
for idx in range(len(self.basins)):
|
|
422
|
-
|
|
445
|
+
bn_dat, features = self.get_basin_data(idx)
|
|
423
446
|
if features is not None:
|
|
424
447
|
if feat in features:
|
|
425
|
-
|
|
448
|
+
# HDF5 dataset
|
|
449
|
+
ds = bn_dat.h5[f"events/{feat}"]
|
|
450
|
+
# Index mapping (taken from the basins which
|
|
451
|
+
# already includes the mapping from the current
|
|
452
|
+
# instance).
|
|
453
|
+
idx_map = bn_dat.index_mapping
|
|
426
454
|
break
|
|
427
455
|
else:
|
|
428
456
|
ds = None
|
|
429
457
|
|
|
430
458
|
if ds is not None:
|
|
431
459
|
image = HDF5ImageCache(
|
|
432
|
-
h5ds=get_mapped_object(obj=ds,
|
|
433
|
-
index_mapping=self.index_mapping),
|
|
460
|
+
h5ds=get_mapped_object(obj=ds, index_mapping=idx_map),
|
|
434
461
|
cache_size=self.image_cache_size,
|
|
435
462
|
boolean=feat == "mask")
|
|
436
463
|
else:
|
|
@@ -34,6 +34,16 @@ class MappedHDF5Dataset:
|
|
|
34
34
|
def get_mapping_indices(
|
|
35
35
|
index_mapping: numbers.Integral | slice | list | np.ndarray
|
|
36
36
|
):
|
|
37
|
+
"""Return integer numpy array with mapping indices for a range
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
index_mapping: numbers.Integral | slice | list | np.ndarray
|
|
42
|
+
Several options you have here:
|
|
43
|
+
- integer: results in np.arrange(integer)
|
|
44
|
+
- slice: results in np.arrange(slice.start, slice.stop, slice.step)
|
|
45
|
+
- list or np.ndarray: returns the input as unit32 array
|
|
46
|
+
"""
|
|
37
47
|
if isinstance(index_mapping, numbers.Integral):
|
|
38
48
|
return _get_mapping_indices_cached(index_mapping)
|
|
39
49
|
elif isinstance(index_mapping, slice):
|
|
@@ -54,12 +64,10 @@ def _get_mapping_indices_cached(
|
|
|
54
64
|
return np.arange(index_mapping)
|
|
55
65
|
elif isinstance(index_mapping, tuple):
|
|
56
66
|
im_slice = slice(*index_mapping)
|
|
57
|
-
if im_slice.step is not None:
|
|
58
|
-
raise NotImplementedError("Slices with step not implemented yet")
|
|
59
67
|
if im_slice.stop is None or im_slice.start is None:
|
|
60
68
|
raise NotImplementedError(
|
|
61
69
|
"Slices must have start and stop defined")
|
|
62
|
-
return np.arange(im_slice.start, im_slice.stop)
|
|
70
|
+
return np.arange(im_slice.start, im_slice.stop, im_slice.step)
|
|
63
71
|
elif isinstance(index_mapping, list):
|
|
64
72
|
return np.array(index_mapping, dtype=np.uint32)
|
|
65
73
|
else:
|
|
@@ -2,4 +2,5 @@
|
|
|
2
2
|
from .deque_writer_thread import DequeWriterThread
|
|
3
3
|
from .queue_collector_thread import EventStash, QueueCollectorThread
|
|
4
4
|
from .writer import (
|
|
5
|
-
HDF5Writer, copy_metadata, create_with_basins,
|
|
5
|
+
HDF5Writer, copy_features, copy_metadata, create_with_basins,
|
|
6
|
+
set_default_filter_kwargs)
|