dcnum 0.17.1__py3-none-any.whl → 0.18.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dcnum might be problematic. Click here for more details.
- dcnum/_version.py +2 -2
- dcnum/feat/__init__.py +1 -1
- dcnum/feat/feat_background/base.py +18 -22
- dcnum/feat/feat_background/bg_copy.py +8 -4
- dcnum/feat/feat_background/bg_roll_median.py +16 -7
- dcnum/feat/feat_background/bg_sparse_median.py +53 -5
- dcnum/feat/feat_brightness/bright_all.py +41 -6
- dcnum/feat/feat_contour/__init__.py +4 -0
- dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
- dcnum/feat/feat_contour/volume.py +174 -0
- dcnum/feat/queue_event_extractor.py +25 -4
- dcnum/logic/ctrl.py +24 -2
- dcnum/logic/json_encoder.py +2 -0
- dcnum/meta/ppid.py +1 -1
- dcnum/read/__init__.py +1 -0
- dcnum/read/cache.py +78 -78
- dcnum/read/const.py +4 -1
- dcnum/read/hdf5_data.py +74 -16
- dcnum/read/mapped.py +79 -0
- dcnum/segm/segm_thresh.py +3 -3
- dcnum/segm/segmenter.py +73 -42
- dcnum/segm/segmenter_cpu.py +5 -5
- dcnum/segm/segmenter_manager_thread.py +11 -2
- dcnum/write/writer.py +37 -5
- {dcnum-0.17.1.dist-info → dcnum-0.18.0.dist-info}/METADATA +1 -1
- dcnum-0.18.0.dist-info/RECORD +48 -0
- {dcnum-0.17.1.dist-info → dcnum-0.18.0.dist-info}/WHEEL +1 -1
- dcnum/feat/feat_moments/__init__.py +0 -4
- dcnum-0.17.1.dist-info/RECORD +0 -46
- /dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
- {dcnum-0.17.1.dist-info → dcnum-0.18.0.dist-info}/LICENSE +0 -0
- {dcnum-0.17.1.dist-info → dcnum-0.18.0.dist-info}/top_level.txt +0 -0
dcnum/logic/ctrl.py
CHANGED
|
@@ -1,9 +1,11 @@
|
|
|
1
1
|
import collections
|
|
2
2
|
import datetime
|
|
3
|
+
import hashlib
|
|
3
4
|
import json
|
|
4
5
|
import logging
|
|
5
6
|
from logging.handlers import QueueListener
|
|
6
7
|
import multiprocessing as mp
|
|
8
|
+
import numbers
|
|
7
9
|
import os
|
|
8
10
|
import pathlib
|
|
9
11
|
import platform
|
|
@@ -14,6 +16,7 @@ import traceback
|
|
|
14
16
|
import uuid
|
|
15
17
|
|
|
16
18
|
import h5py
|
|
19
|
+
import numpy as np
|
|
17
20
|
|
|
18
21
|
from ..feat.feat_background.base import get_available_background_methods
|
|
19
22
|
from ..feat.queue_event_extractor import QueueEventExtractor
|
|
@@ -382,6 +385,24 @@ class DCNumJobRunner(threading.Thread):
|
|
|
382
385
|
hw.h5.attrs["pipeline:dcnum gate"] = self.ppdict["gate_id"]
|
|
383
386
|
hw.h5.attrs["pipeline:dcnum hash"] = self.pphash
|
|
384
387
|
hw.h5.attrs["pipeline:dcnum yield"] = self.event_count
|
|
388
|
+
# index mapping information
|
|
389
|
+
im = self.job.kwargs["data_kwargs"].get("index_mapping", None)
|
|
390
|
+
if im is None:
|
|
391
|
+
dim = "0"
|
|
392
|
+
elif isinstance(im, numbers.Number):
|
|
393
|
+
dim = f"{im}"
|
|
394
|
+
elif isinstance(im, slice):
|
|
395
|
+
dim = (f"{im.start if im.start is not None else 'n'}"
|
|
396
|
+
+ f"-{im.stop if im.stop is not None else 'n'}"
|
|
397
|
+
+ f"-{im.step if im.step is not None else 'n'}"
|
|
398
|
+
)
|
|
399
|
+
elif isinstance(im, (list, np.ndarray)):
|
|
400
|
+
idhash = hashlib.md5(
|
|
401
|
+
np.array(im, dtype=np.uint32).tobytes()).hexdigest()
|
|
402
|
+
dim = f"h-{idhash[:8]}"
|
|
403
|
+
else:
|
|
404
|
+
dim = "unknown"
|
|
405
|
+
hw.h5.attrs["pipeline:dcnum mapping"] = dim
|
|
385
406
|
# regular metadata
|
|
386
407
|
hw.h5.attrs["experiment:event count"] = self.event_count
|
|
387
408
|
hw.h5.attrs["imaging:pixel size"] = self.draw.pixel_size
|
|
@@ -503,7 +524,7 @@ class DCNumJobRunner(threading.Thread):
|
|
|
503
524
|
num_segmenters = 1
|
|
504
525
|
elif seg_cls.hardware_processor == "cpu": # CPU segmenter
|
|
505
526
|
# We could in principle set the number of slots to one and
|
|
506
|
-
#
|
|
527
|
+
# have both number of extractors and number of segmenters set
|
|
507
528
|
# to the total number of CPUs. However, we would need more RAM
|
|
508
529
|
# (for caching the image data) and we also have more overhead.
|
|
509
530
|
# Having two slots shared between all workers is more efficient.
|
|
@@ -522,10 +543,11 @@ class DCNumJobRunner(threading.Thread):
|
|
|
522
543
|
slot_chunks = mp_spawn.Array("i", num_slots)
|
|
523
544
|
slot_states = mp_spawn.Array("u", num_slots)
|
|
524
545
|
|
|
525
|
-
# Initialize thread
|
|
546
|
+
# Initialize segmenter manager thread
|
|
526
547
|
thr_segm = SegmenterManagerThread(
|
|
527
548
|
segmenter=seg_cls(**self.job["segmenter_kwargs"]),
|
|
528
549
|
image_data=imdat,
|
|
550
|
+
bg_off=self.dtin["bg_off"] if "bg_off" in self.dtin else None,
|
|
529
551
|
slot_states=slot_states,
|
|
530
552
|
slot_chunks=slot_chunks,
|
|
531
553
|
debug=self.job["debug"],
|
dcnum/logic/json_encoder.py
CHANGED
|
@@ -13,5 +13,7 @@ class ExtendedJSONEncoder(json.JSONEncoder):
|
|
|
13
13
|
return int(obj)
|
|
14
14
|
elif isinstance(obj, np.bool_):
|
|
15
15
|
return bool(obj)
|
|
16
|
+
elif isinstance(obj, slice):
|
|
17
|
+
return "PYTHON-SLICE", (obj.start, obj.stop, obj.step)
|
|
16
18
|
# Let the base class default method raise the TypeError
|
|
17
19
|
return json.JSONEncoder.default(self, obj)
|
dcnum/meta/ppid.py
CHANGED
dcnum/read/__init__.py
CHANGED
dcnum/read/cache.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import abc
|
|
1
2
|
import collections
|
|
2
3
|
import functools
|
|
3
4
|
import hashlib
|
|
4
5
|
import pathlib
|
|
6
|
+
from typing import Tuple
|
|
5
7
|
import warnings
|
|
6
8
|
|
|
7
9
|
import h5py
|
|
@@ -13,41 +15,34 @@ class EmptyDatasetWarning(UserWarning):
|
|
|
13
15
|
pass
|
|
14
16
|
|
|
15
17
|
|
|
16
|
-
class
|
|
18
|
+
class BaseImageChunkCache(abc.ABC):
|
|
17
19
|
def __init__(self,
|
|
18
|
-
|
|
20
|
+
shape: Tuple[int],
|
|
19
21
|
chunk_size: int = 1000,
|
|
20
22
|
cache_size: int = 2,
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
Deformability cytometry data files commonly contain image stacks
|
|
25
|
-
that are chunked in various ways. Loading just a single image
|
|
26
|
-
can be time-consuming, because an entire HDF5 chunk has to be
|
|
27
|
-
loaded, decompressed and from that one image extracted. The
|
|
28
|
-
`HDF5ImageCache` class caches the chunks from the HDF5 files
|
|
29
|
-
into memory, making single-image-access very fast.
|
|
30
|
-
"""
|
|
31
|
-
self.shape = h5ds.shape
|
|
23
|
+
):
|
|
24
|
+
self.shape = shape
|
|
25
|
+
chunk_size = min(shape[0], chunk_size)
|
|
32
26
|
self._len = self.shape[0]
|
|
33
|
-
if self._len == 0:
|
|
34
|
-
warnings.warn(f"Input image '{h5ds.name}' in "
|
|
35
|
-
f"file {h5ds.file.filename} has zero length",
|
|
36
|
-
EmptyDatasetWarning)
|
|
37
|
-
# TODO:
|
|
38
|
-
# - adjust chunking to multiples of the chunks in the dataset
|
|
39
|
-
# (which might slightly speed up things)
|
|
40
|
-
chunk_size = min(h5ds.shape[0], chunk_size)
|
|
41
|
-
self.h5ds = h5ds
|
|
42
|
-
self.chunk_size = chunk_size
|
|
43
|
-
self.boolean = boolean
|
|
44
|
-
self.cache_size = cache_size
|
|
45
27
|
#: This is a FILO cache for the chunks
|
|
46
28
|
self.cache = collections.OrderedDict()
|
|
47
29
|
self.image_shape = self.shape[1:]
|
|
48
30
|
self.chunk_shape = (chunk_size,) + self.shape[1:]
|
|
31
|
+
self.chunk_size = chunk_size
|
|
32
|
+
self.cache_size = cache_size
|
|
49
33
|
self.num_chunks = int(np.ceil(self._len / (self.chunk_size or 1)))
|
|
50
34
|
|
|
35
|
+
def __getitem__(self, index):
|
|
36
|
+
chunk_index, sub_index = self._get_chunk_index_for_index(index)
|
|
37
|
+
return self.get_chunk(chunk_index)[sub_index]
|
|
38
|
+
|
|
39
|
+
def __len__(self):
|
|
40
|
+
return self._len
|
|
41
|
+
|
|
42
|
+
@abc.abstractmethod
|
|
43
|
+
def _get_chunk_data(self, chunk_slice):
|
|
44
|
+
"""Implemented in subclass to obtain actual data"""
|
|
45
|
+
|
|
51
46
|
def _get_chunk_index_for_index(self, index):
|
|
52
47
|
if index < 0:
|
|
53
48
|
index = self._len + index
|
|
@@ -59,26 +54,14 @@ class HDF5ImageCache:
|
|
|
59
54
|
sub_index = index % self.chunk_size
|
|
60
55
|
return chunk_index, sub_index
|
|
61
56
|
|
|
62
|
-
def __getitem__(self, index):
|
|
63
|
-
chunk_index, sub_index = self._get_chunk_index_for_index(index)
|
|
64
|
-
return self.get_chunk(chunk_index)[sub_index]
|
|
65
|
-
|
|
66
|
-
def __len__(self):
|
|
67
|
-
return self._len
|
|
68
|
-
|
|
69
57
|
def get_chunk(self, chunk_index):
|
|
70
58
|
"""Return one chunk of images"""
|
|
71
59
|
if chunk_index not in self.cache:
|
|
72
|
-
|
|
73
|
-
self.chunk_size * (chunk_index + 1)
|
|
74
|
-
)
|
|
75
|
-
data = self.h5ds[fslice]
|
|
76
|
-
if self.boolean:
|
|
77
|
-
data = np.array(data, dtype=bool)
|
|
78
|
-
self.cache[chunk_index] = data
|
|
79
|
-
if len(self.cache) > self.cache_size:
|
|
60
|
+
if len(self.cache) >= self.cache_size:
|
|
80
61
|
# Remove the first item
|
|
81
62
|
self.cache.popitem(last=False)
|
|
63
|
+
data = self._get_chunk_data(self.get_chunk_slice(chunk_index))
|
|
64
|
+
self.cache[chunk_index] = data
|
|
82
65
|
return self.cache[chunk_index]
|
|
83
66
|
|
|
84
67
|
def get_chunk_size(self, chunk_index):
|
|
@@ -91,60 +74,77 @@ class HDF5ImageCache:
|
|
|
91
74
|
raise IndexError(f"{self} only has {self.num_chunks} chunks!")
|
|
92
75
|
return chunk_size
|
|
93
76
|
|
|
77
|
+
def get_chunk_slice(self, chunk_index):
|
|
78
|
+
"""Return the slice corresponding to the chunk index"""
|
|
79
|
+
ch_slice = slice(self.chunk_size * chunk_index,
|
|
80
|
+
self.chunk_size * (chunk_index + 1)
|
|
81
|
+
)
|
|
82
|
+
return ch_slice
|
|
83
|
+
|
|
94
84
|
def iter_chunks(self):
|
|
95
|
-
size = self.h5ds.shape[0]
|
|
96
85
|
index = 0
|
|
97
86
|
chunk = 0
|
|
98
87
|
while True:
|
|
99
88
|
yield chunk
|
|
100
89
|
chunk += 1
|
|
101
90
|
index += self.chunk_size
|
|
102
|
-
if index >=
|
|
91
|
+
if index >= self._len:
|
|
103
92
|
break
|
|
104
93
|
|
|
105
94
|
|
|
106
|
-
class
|
|
95
|
+
class HDF5ImageCache(BaseImageChunkCache):
|
|
107
96
|
def __init__(self,
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
self.num_chunks = image.num_chunks
|
|
114
|
-
self.h5ds = image.h5ds
|
|
115
|
-
self.shape = image.shape
|
|
116
|
-
self.chunk_shape = image.chunk_shape
|
|
117
|
-
#: This is a FILO cache for the corrected image chunks
|
|
118
|
-
self.cache = collections.OrderedDict()
|
|
119
|
-
self.cache_size = image.cache_size
|
|
97
|
+
h5ds: h5py.Dataset,
|
|
98
|
+
chunk_size: int = 1000,
|
|
99
|
+
cache_size: int = 2,
|
|
100
|
+
boolean: bool = False):
|
|
101
|
+
"""An HDF5 image cache
|
|
120
102
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
103
|
+
Deformability cytometry data files commonly contain image stacks
|
|
104
|
+
that are chunked in various ways. Loading just a single image
|
|
105
|
+
can be time-consuming, because an entire HDF5 chunk has to be
|
|
106
|
+
loaded, decompressed and from that one image extracted. The
|
|
107
|
+
`HDF5ImageCache` class caches the chunks from the HDF5 files
|
|
108
|
+
into memory, making single-image-access very fast.
|
|
109
|
+
"""
|
|
110
|
+
super(HDF5ImageCache, self).__init__(
|
|
111
|
+
shape=h5ds.shape,
|
|
112
|
+
chunk_size=chunk_size,
|
|
113
|
+
cache_size=cache_size)
|
|
114
|
+
# TODO:
|
|
115
|
+
# - adjust chunking to multiples of the chunks in the dataset
|
|
116
|
+
# (which might slightly speed up things)
|
|
117
|
+
self.h5ds = h5ds
|
|
118
|
+
self.boolean = boolean
|
|
127
119
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
120
|
+
if self._len == 0:
|
|
121
|
+
warnings.warn(f"Input image '{h5ds.name}' in "
|
|
122
|
+
f"file {h5ds.file.filename} has zero length",
|
|
123
|
+
EmptyDatasetWarning)
|
|
131
124
|
|
|
132
|
-
def
|
|
133
|
-
|
|
125
|
+
def _get_chunk_data(self, chunk_slice):
|
|
126
|
+
data = self.h5ds[chunk_slice]
|
|
127
|
+
if self.boolean:
|
|
128
|
+
data = np.array(data, dtype=bool)
|
|
129
|
+
return data
|
|
134
130
|
|
|
135
|
-
def get_chunk(self, chunk_index):
|
|
136
|
-
if chunk_index not in self.cache:
|
|
137
|
-
data = np.array(
|
|
138
|
-
self.image.get_chunk(chunk_index), dtype=np.int16) \
|
|
139
|
-
- self.image_bg.get_chunk(chunk_index)
|
|
140
|
-
self.cache[chunk_index] = data
|
|
141
|
-
if len(self.cache) > self.cache_size:
|
|
142
|
-
# Remove the first item
|
|
143
|
-
self.cache.popitem(last=False)
|
|
144
|
-
return self.cache[chunk_index]
|
|
145
131
|
|
|
146
|
-
|
|
147
|
-
|
|
132
|
+
class ImageCorrCache(BaseImageChunkCache):
|
|
133
|
+
def __init__(self,
|
|
134
|
+
image: HDF5ImageCache,
|
|
135
|
+
image_bg: HDF5ImageCache):
|
|
136
|
+
super(ImageCorrCache, self).__init__(
|
|
137
|
+
shape=image.shape,
|
|
138
|
+
chunk_size=image.chunk_size,
|
|
139
|
+
cache_size=image.cache_size)
|
|
140
|
+
self.image = image
|
|
141
|
+
self.image_bg = image_bg
|
|
142
|
+
|
|
143
|
+
def _get_chunk_data(self, chunk_slice):
|
|
144
|
+
data = np.array(
|
|
145
|
+
self.image._get_chunk_data(chunk_slice), dtype=np.int16) \
|
|
146
|
+
- self.image_bg._get_chunk_data(chunk_slice)
|
|
147
|
+
return data
|
|
148
148
|
|
|
149
149
|
|
|
150
150
|
@functools.cache
|
dcnum/read/const.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#: Scalar features that apply to all events in a frame and which are
|
|
2
|
-
#: not computed
|
|
2
|
+
#: not computed for individual events.
|
|
3
3
|
PROTECTED_FEATURES = [
|
|
4
|
+
"bg_off",
|
|
4
5
|
"flow_rate",
|
|
5
6
|
"frame",
|
|
6
7
|
"g_force",
|
|
@@ -10,5 +11,7 @@ PROTECTED_FEATURES = [
|
|
|
10
11
|
"time"
|
|
11
12
|
]
|
|
12
13
|
|
|
14
|
+
# User-defined features may be anything, but if the user needs something
|
|
15
|
+
# very specific for the pipeline, having them protected is a nice feature.
|
|
13
16
|
for ii in range(10):
|
|
14
17
|
PROTECTED_FEATURES.append(f"userdef{ii}")
|
dcnum/read/hdf5_data.py
CHANGED
|
@@ -13,6 +13,7 @@ import numpy as np
|
|
|
13
13
|
|
|
14
14
|
from .cache import HDF5ImageCache, ImageCorrCache, md5sum
|
|
15
15
|
from .const import PROTECTED_FEATURES
|
|
16
|
+
from .mapped import get_mapped_object, get_mapping_indices
|
|
16
17
|
|
|
17
18
|
|
|
18
19
|
class HDF5Data:
|
|
@@ -26,12 +27,47 @@ class HDF5Data:
|
|
|
26
27
|
logs: Dict[List[str]] = None,
|
|
27
28
|
tables: Dict[np.ndarray] = None,
|
|
28
29
|
image_cache_size: int = 2,
|
|
30
|
+
index_mapping: int | slice | List | np.ndarray = None,
|
|
29
31
|
):
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
Parameters
|
|
35
|
+
----------
|
|
36
|
+
path:
|
|
37
|
+
path to data file
|
|
38
|
+
pixel_size:
|
|
39
|
+
pixel size in µm
|
|
40
|
+
md5_5m:
|
|
41
|
+
MD5 sum of the first 5 MiB; computed if not provided
|
|
42
|
+
meta:
|
|
43
|
+
metadata dictionary; extracted from HDF5 attributes
|
|
44
|
+
if not provided
|
|
45
|
+
basins:
|
|
46
|
+
list of basin dictionaries; extracted from HDF5 attributes
|
|
47
|
+
if not provided
|
|
48
|
+
logs:
|
|
49
|
+
dictionary of logs; extracted from HDF5 attributes
|
|
50
|
+
if not provided
|
|
51
|
+
tables:
|
|
52
|
+
dictionary of tables; extracted from HDF5 attributes
|
|
53
|
+
if not provided
|
|
54
|
+
image_cache_size:
|
|
55
|
+
size of the image cache to use when accessing image data
|
|
56
|
+
index_mapping:
|
|
57
|
+
select only a subset of input events, transparently reducing the
|
|
58
|
+
size of the dataset, possible data types are
|
|
59
|
+
- int `N`: use the first `N` events
|
|
60
|
+
- slice: use the events defined by a slice
|
|
61
|
+
- list: list of integers specifying the event indices to use
|
|
62
|
+
Numpy indexing rules apply. E.g. to only process the first
|
|
63
|
+
100 events, set this to `100` or `slice(0, 100)`.
|
|
64
|
+
"""
|
|
30
65
|
# Init is in __setstate__ so we can pickle this class
|
|
31
66
|
# and use it for multiprocessing.
|
|
32
67
|
if isinstance(path, h5py.File):
|
|
33
68
|
self.h5 = path
|
|
34
69
|
path = path.filename
|
|
70
|
+
|
|
35
71
|
self.__setstate__({"path": path,
|
|
36
72
|
"pixel_size": pixel_size,
|
|
37
73
|
"md5_5m": md5_5m,
|
|
@@ -40,6 +76,7 @@ class HDF5Data:
|
|
|
40
76
|
"logs": logs,
|
|
41
77
|
"tables": tables,
|
|
42
78
|
"image_cache_size": image_cache_size,
|
|
79
|
+
"index_mapping": index_mapping,
|
|
43
80
|
})
|
|
44
81
|
|
|
45
82
|
def __contains__(self, item):
|
|
@@ -53,7 +90,7 @@ class HDF5Data:
|
|
|
53
90
|
|
|
54
91
|
def __getitem__(self, feat):
|
|
55
92
|
if feat in ["image", "image_bg", "mask"]:
|
|
56
|
-
data = self.get_image_cache(feat)
|
|
93
|
+
data = self.get_image_cache(feat) # already index-mapped
|
|
57
94
|
if data is None:
|
|
58
95
|
raise KeyError(f"Feature '{feat}' not found in {self}!")
|
|
59
96
|
else:
|
|
@@ -62,19 +99,25 @@ class HDF5Data:
|
|
|
62
99
|
return self._cache_scalar[feat]
|
|
63
100
|
elif (feat in self.h5["events"]
|
|
64
101
|
and len(self.h5["events"][feat].shape) == 1): # cache scalar
|
|
65
|
-
self.
|
|
102
|
+
if self.index_mapping is None:
|
|
103
|
+
idx_map = slice(None) # no mapping indices, just slice
|
|
104
|
+
else:
|
|
105
|
+
idx_map = get_mapping_indices(self.index_mapping)
|
|
106
|
+
self._cache_scalar[feat] = self.h5["events"][feat][idx_map]
|
|
66
107
|
return self._cache_scalar[feat]
|
|
67
108
|
else:
|
|
68
109
|
if feat in self.h5["events"]:
|
|
69
110
|
# Not cached (possibly slow)
|
|
70
111
|
warnings.warn(f"Feature {feat} not cached (possibly slow)")
|
|
71
|
-
return
|
|
112
|
+
return get_mapped_object(
|
|
113
|
+
obj=self.h5["events"][feat],
|
|
114
|
+
index_mapping=self.index_mapping)
|
|
72
115
|
else:
|
|
73
116
|
# Check the basins
|
|
74
117
|
for idx in range(len(self.basins)):
|
|
75
118
|
bn, bn_features = self.get_basin_data(idx)
|
|
76
119
|
if bn_features and feat in bn_features:
|
|
77
|
-
return bn[feat]
|
|
120
|
+
return bn[feat] # already index-mapped
|
|
78
121
|
# If we got here, then the feature data does not exist.
|
|
79
122
|
raise KeyError(f"Feature '{feat}' not found in {self}!")
|
|
80
123
|
|
|
@@ -86,13 +129,14 @@ class HDF5Data:
|
|
|
86
129
|
"logs": self.logs,
|
|
87
130
|
"tables": self.tables,
|
|
88
131
|
"basins": self.basins,
|
|
89
|
-
"image_cache_size": self.image.cache_size
|
|
132
|
+
"image_cache_size": self.image.cache_size,
|
|
133
|
+
"index_mapping": self.index_mapping,
|
|
90
134
|
}
|
|
91
135
|
|
|
92
136
|
def __setstate__(self, state):
|
|
93
137
|
# Make sure these properties exist (we rely on __init__, because
|
|
94
138
|
# we want this class to be pickable and __init__ is not called by
|
|
95
|
-
# `pickle.load
|
|
139
|
+
# `pickle.load`).
|
|
96
140
|
# Cached properties
|
|
97
141
|
self._feats = None
|
|
98
142
|
self._keys = None
|
|
@@ -116,7 +160,7 @@ class HDF5Data:
|
|
|
116
160
|
if self.md5_5m is None:
|
|
117
161
|
if isinstance(self.path, pathlib.Path):
|
|
118
162
|
# 5MB md5sum of input file
|
|
119
|
-
self.md5_5m = md5sum(self.path, count=80)
|
|
163
|
+
self.md5_5m = md5sum(self.path, blocksize=65536, count=80)
|
|
120
164
|
else:
|
|
121
165
|
self.md5_5m = str(uuid.uuid4()).replace("-", "")
|
|
122
166
|
self.meta = state["meta"]
|
|
@@ -165,12 +209,17 @@ class HDF5Data:
|
|
|
165
209
|
|
|
166
210
|
self.image_cache_size = state["image_cache_size"]
|
|
167
211
|
|
|
212
|
+
self.index_mapping = state["index_mapping"]
|
|
213
|
+
|
|
168
214
|
if self.h5 is None:
|
|
169
215
|
self.h5 = h5py.File(self.path, libver="latest")
|
|
170
216
|
|
|
171
217
|
def __len__(self):
|
|
172
218
|
if self._len is None:
|
|
173
|
-
self.
|
|
219
|
+
if self.index_mapping is not None:
|
|
220
|
+
self._len = get_mapping_indices(self.index_mapping).size
|
|
221
|
+
else:
|
|
222
|
+
self._len = self.h5.attrs["experiment:event count"]
|
|
174
223
|
return self._len
|
|
175
224
|
|
|
176
225
|
@property
|
|
@@ -255,20 +304,26 @@ class HDF5Data:
|
|
|
255
304
|
# Data does not really fit into the PPID scheme we use for the rest
|
|
256
305
|
# of the pipeline. This implementation here is custom.
|
|
257
306
|
code = cls.get_ppid_code()
|
|
258
|
-
|
|
307
|
+
ppid_ps = f"{kwargs['pixel_size']:.8f}".rstrip("0")
|
|
308
|
+
kwid = "^".join([f"p={ppid_ps}"])
|
|
259
309
|
return ":".join([code, kwid])
|
|
260
310
|
|
|
261
311
|
@staticmethod
|
|
262
312
|
def get_ppkw_from_ppid(dat_ppid):
|
|
263
313
|
# Data does not fit in the PPID scheme we use, but we still
|
|
264
314
|
# would like to pass pixel_size to __init__ if we need it.
|
|
265
|
-
code,
|
|
315
|
+
code, kwargs_str = dat_ppid.split(":")
|
|
266
316
|
if code != HDF5Data.get_ppid_code():
|
|
267
317
|
raise ValueError(f"Could not find data method '{code}'!")
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
318
|
+
kwitems = kwargs_str.split("^")
|
|
319
|
+
kwargs = {}
|
|
320
|
+
for item in kwitems:
|
|
321
|
+
var, val = item.split("=")
|
|
322
|
+
if var == "p":
|
|
323
|
+
kwargs["pixel_size"] = float(val)
|
|
324
|
+
else:
|
|
325
|
+
raise ValueError(f"Invalid parameter '{var}'!")
|
|
326
|
+
return kwargs
|
|
272
327
|
|
|
273
328
|
def get_basin_data(self, index):
|
|
274
329
|
"""Return HDF5Data info for a basin index in `self.basins`
|
|
@@ -298,7 +353,7 @@ class HDF5Data:
|
|
|
298
353
|
if path is None:
|
|
299
354
|
self._basin_data[index] = (None, None)
|
|
300
355
|
else:
|
|
301
|
-
h5dat = HDF5Data(path)
|
|
356
|
+
h5dat = HDF5Data(path, index_mapping=self.index_mapping)
|
|
302
357
|
features = bn_dict.get("features")
|
|
303
358
|
if features is None:
|
|
304
359
|
# Only get the features from the actual HDF5 file.
|
|
@@ -336,7 +391,8 @@ class HDF5Data:
|
|
|
336
391
|
|
|
337
392
|
if ds is not None:
|
|
338
393
|
image = HDF5ImageCache(
|
|
339
|
-
h5ds=ds,
|
|
394
|
+
h5ds=get_mapped_object(obj=ds,
|
|
395
|
+
index_mapping=self.index_mapping),
|
|
340
396
|
cache_size=self.image_cache_size,
|
|
341
397
|
boolean=feat == "mask")
|
|
342
398
|
else:
|
|
@@ -386,6 +442,7 @@ def concatenated_hdf5_data(paths: List[pathlib.Path],
|
|
|
386
442
|
- If one of the input files does not contain a feature from the first
|
|
387
443
|
input `paths`, then a `ValueError` is raised. Use the `features`
|
|
388
444
|
argument to specify which features you need instead.
|
|
445
|
+
- Basins are not considered.
|
|
389
446
|
"""
|
|
390
447
|
h5kwargs = {"mode": "w", "libver": "latest"}
|
|
391
448
|
if isinstance(path_out, (pathlib.Path, str)):
|
|
@@ -432,6 +489,7 @@ def concatenated_hdf5_data(paths: List[pathlib.Path],
|
|
|
432
489
|
if not isinstance(h5["events"][feat], h5py.Dataset):
|
|
433
490
|
warnings.warn(
|
|
434
491
|
f"Ignoring {feat}; not implemented yet!")
|
|
492
|
+
continue
|
|
435
493
|
if feat in ["frame", "time"]:
|
|
436
494
|
continue
|
|
437
495
|
shapes.setdefault(feat, []).append(
|
dcnum/read/mapped.py
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
|
|
3
|
+
import numbers
|
|
4
|
+
|
|
5
|
+
import h5py
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MappedHDF5Dataset:
|
|
10
|
+
def __init__(self,
|
|
11
|
+
h5ds: h5py.Dataset,
|
|
12
|
+
mapping_indices: np.ndarray):
|
|
13
|
+
"""An index-mapped object for accessing an HDF5 dataset
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
h5ds: h5py.Dataset
|
|
18
|
+
HDF5 dataset from which to map data
|
|
19
|
+
mapping_indices: np.ndarray
|
|
20
|
+
numpy indexing array containing integer indices
|
|
21
|
+
"""
|
|
22
|
+
self.h5ds = h5ds
|
|
23
|
+
self.mapping_indices = mapping_indices
|
|
24
|
+
self.shape = (mapping_indices.size,) + h5ds.shape[1:]
|
|
25
|
+
|
|
26
|
+
def __getitem__(self, idx):
|
|
27
|
+
if isinstance(idx, numbers.Integral):
|
|
28
|
+
return self.h5ds[self.mapping_indices[idx]]
|
|
29
|
+
else:
|
|
30
|
+
idx_mapped = self.mapping_indices[idx]
|
|
31
|
+
return self.h5ds[idx_mapped]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_mapping_indices(
|
|
35
|
+
index_mapping: numbers.Integral | slice | list | np.ndarray
|
|
36
|
+
):
|
|
37
|
+
if isinstance(index_mapping, numbers.Integral):
|
|
38
|
+
return _get_mapping_indices_cached(index_mapping)
|
|
39
|
+
elif isinstance(index_mapping, slice):
|
|
40
|
+
return _get_mapping_indices_cached(
|
|
41
|
+
(index_mapping.start, index_mapping.stop, index_mapping.step))
|
|
42
|
+
elif isinstance(index_mapping, (np.ndarray, list)):
|
|
43
|
+
return np.array(index_mapping, dtype=np.uint32)
|
|
44
|
+
else:
|
|
45
|
+
raise ValueError(f"Invalid type for `index_mapping`: "
|
|
46
|
+
f"{type(index_mapping)} ({index_mapping})")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@functools.lru_cache(maxsize=100)
|
|
50
|
+
def _get_mapping_indices_cached(
|
|
51
|
+
index_mapping: numbers.Integral | tuple
|
|
52
|
+
):
|
|
53
|
+
if isinstance(index_mapping, numbers.Integral):
|
|
54
|
+
return np.arange(index_mapping)
|
|
55
|
+
elif isinstance(index_mapping, tuple):
|
|
56
|
+
im_slice = slice(*index_mapping)
|
|
57
|
+
if im_slice.step is not None:
|
|
58
|
+
raise NotImplementedError("Slices with step not implemented yet")
|
|
59
|
+
if im_slice.stop is None or im_slice.start is None:
|
|
60
|
+
raise NotImplementedError(
|
|
61
|
+
"Slices must have start and stop defined")
|
|
62
|
+
return np.arange(im_slice.start, im_slice.stop)
|
|
63
|
+
elif isinstance(index_mapping, list):
|
|
64
|
+
return np.array(index_mapping, dtype=np.uint32)
|
|
65
|
+
else:
|
|
66
|
+
raise ValueError(f"Invalid type for cached `index_mapping`: "
|
|
67
|
+
f"{type(index_mapping)} ({index_mapping})")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_mapped_object(obj, index_mapping=None):
|
|
71
|
+
if index_mapping is None:
|
|
72
|
+
return obj
|
|
73
|
+
elif isinstance(obj, h5py.Dataset):
|
|
74
|
+
return MappedHDF5Dataset(
|
|
75
|
+
obj,
|
|
76
|
+
mapping_indices=get_mapping_indices(index_mapping))
|
|
77
|
+
else:
|
|
78
|
+
raise ValueError(f"No recipe to convert object of type {type(obj)} "
|
|
79
|
+
f"({obj}) to an index-mapped object")
|
dcnum/segm/segm_thresh.py
CHANGED
|
@@ -16,7 +16,7 @@ class SegmentThresh(CPUSegmenter):
|
|
|
16
16
|
Parameters
|
|
17
17
|
----------
|
|
18
18
|
thresh: int
|
|
19
|
-
grayscale
|
|
19
|
+
grayscale threshold value for creating the mask image;
|
|
20
20
|
For a background-corrected image, pixels with values below
|
|
21
21
|
this value are considered to be part of the mask.
|
|
22
22
|
"""
|
|
@@ -25,7 +25,7 @@ class SegmentThresh(CPUSegmenter):
|
|
|
25
25
|
@staticmethod
|
|
26
26
|
def segment_approach(image, *,
|
|
27
27
|
thresh: float = -6):
|
|
28
|
-
"""Mask retrieval
|
|
28
|
+
"""Mask retrieval using basic thresholding
|
|
29
29
|
|
|
30
30
|
Parameters
|
|
31
31
|
----------
|
|
@@ -39,7 +39,7 @@ class SegmentThresh(CPUSegmenter):
|
|
|
39
39
|
Returns
|
|
40
40
|
-------
|
|
41
41
|
mask: 2d boolean ndarray
|
|
42
|
-
Mask image for the
|
|
42
|
+
Mask image for the given index
|
|
43
43
|
"""
|
|
44
44
|
assert thresh < 0, "threshold values above zero not supported!"
|
|
45
45
|
return image < thresh
|