dcnum 0.13.2__py3-none-any.whl → 0.23.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dcnum might be problematic. Click here for more details.
- dcnum/_version.py +2 -2
- dcnum/feat/__init__.py +2 -1
- dcnum/feat/event_extractor_manager_thread.py +67 -33
- dcnum/feat/feat_background/__init__.py +3 -12
- dcnum/feat/feat_background/base.py +80 -65
- dcnum/feat/feat_background/bg_copy.py +31 -0
- dcnum/feat/feat_background/bg_roll_median.py +38 -30
- dcnum/feat/feat_background/bg_sparse_median.py +96 -45
- dcnum/feat/feat_brightness/__init__.py +1 -0
- dcnum/feat/feat_brightness/bright_all.py +41 -6
- dcnum/feat/feat_contour/__init__.py +4 -0
- dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
- dcnum/feat/feat_contour/volume.py +174 -0
- dcnum/feat/feat_texture/__init__.py +1 -0
- dcnum/feat/feat_texture/tex_all.py +28 -1
- dcnum/feat/gate.py +92 -70
- dcnum/feat/queue_event_extractor.py +139 -70
- dcnum/logic/__init__.py +5 -0
- dcnum/logic/ctrl.py +794 -0
- dcnum/logic/job.py +184 -0
- dcnum/logic/json_encoder.py +19 -0
- dcnum/meta/__init__.py +1 -0
- dcnum/meta/paths.py +30 -0
- dcnum/meta/ppid.py +66 -9
- dcnum/read/__init__.py +1 -0
- dcnum/read/cache.py +109 -77
- dcnum/read/const.py +6 -4
- dcnum/read/hdf5_data.py +190 -31
- dcnum/read/mapped.py +87 -0
- dcnum/segm/__init__.py +6 -15
- dcnum/segm/segm_thresh.py +7 -14
- dcnum/segm/segm_torch/__init__.py +19 -0
- dcnum/segm/segm_torch/segm_torch_base.py +125 -0
- dcnum/segm/segm_torch/segm_torch_mpo.py +71 -0
- dcnum/segm/segm_torch/segm_torch_sto.py +88 -0
- dcnum/segm/segm_torch/torch_model.py +95 -0
- dcnum/segm/segm_torch/torch_postproc.py +93 -0
- dcnum/segm/segm_torch/torch_preproc.py +114 -0
- dcnum/segm/segmenter.py +245 -96
- dcnum/segm/segmenter_manager_thread.py +39 -28
- dcnum/segm/{segmenter_cpu.py → segmenter_mpo.py} +137 -43
- dcnum/segm/segmenter_sto.py +110 -0
- dcnum/write/__init__.py +3 -1
- dcnum/write/deque_writer_thread.py +15 -5
- dcnum/write/queue_collector_thread.py +14 -17
- dcnum/write/writer.py +225 -55
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/METADATA +4 -2
- dcnum-0.23.1.dist-info/RECORD +55 -0
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/WHEEL +1 -1
- dcnum/feat/feat_moments/__init__.py +0 -3
- dcnum/segm/segmenter_gpu.py +0 -45
- dcnum-0.13.2.dist-info/RECORD +0 -40
- /dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/LICENSE +0 -0
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/top_level.txt +0 -0
dcnum/logic/job.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
import copy
|
|
3
|
+
import inspect
|
|
4
|
+
import logging
|
|
5
|
+
import multiprocessing as mp
|
|
6
|
+
import pathlib
|
|
7
|
+
from typing import Dict, Literal
|
|
8
|
+
import warnings
|
|
9
|
+
|
|
10
|
+
from ..feat import QueueEventExtractor
|
|
11
|
+
from ..feat.feat_background.base import get_available_background_methods
|
|
12
|
+
from ..feat.gate import Gate
|
|
13
|
+
from ..meta.ppid import compute_pipeline_hash, DCNUM_PPID_GENERATION
|
|
14
|
+
from ..read import HDF5Data
|
|
15
|
+
from ..segm import get_available_segmenters
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class DCNumPipelineJob:
|
|
19
|
+
def __init__(self,
|
|
20
|
+
path_in: pathlib.Path | str,
|
|
21
|
+
path_out: pathlib.Path | str = None,
|
|
22
|
+
data_code: str = "hdf",
|
|
23
|
+
data_kwargs: Dict = None,
|
|
24
|
+
background_code: str = "sparsemed",
|
|
25
|
+
background_kwargs: Dict = None,
|
|
26
|
+
segmenter_code: str = "thresh",
|
|
27
|
+
segmenter_kwargs: Dict = None,
|
|
28
|
+
feature_code: str = "legacy",
|
|
29
|
+
feature_kwargs: Dict = None,
|
|
30
|
+
gate_code: str = "norm",
|
|
31
|
+
gate_kwargs: Dict = None,
|
|
32
|
+
basin_strategy: Literal["drain", "tap"] = "drain",
|
|
33
|
+
no_basins_in_output: bool = None,
|
|
34
|
+
num_procs: int = None,
|
|
35
|
+
log_level: int = logging.INFO,
|
|
36
|
+
debug: bool = False,
|
|
37
|
+
):
|
|
38
|
+
"""Pipeline job recipe
|
|
39
|
+
|
|
40
|
+
Parameters
|
|
41
|
+
----------
|
|
42
|
+
path_in: pathlib.Path | str
|
|
43
|
+
input data path
|
|
44
|
+
path_out: pathlib.Path | str
|
|
45
|
+
output data path
|
|
46
|
+
data_code: str
|
|
47
|
+
code of input data reader to use
|
|
48
|
+
data_kwargs: dict
|
|
49
|
+
keyword arguments for data reader
|
|
50
|
+
background_code: str
|
|
51
|
+
code of background data computer to use
|
|
52
|
+
background_kwargs: dict
|
|
53
|
+
keyword arguments for background data computer
|
|
54
|
+
segmenter_code: str
|
|
55
|
+
code of segmenter to use
|
|
56
|
+
segmenter_kwargs: dict
|
|
57
|
+
keyword arguments for segmenter
|
|
58
|
+
feature_code: str
|
|
59
|
+
code of feature extractor
|
|
60
|
+
feature_kwargs: dict
|
|
61
|
+
keyword arguments for feature extractor
|
|
62
|
+
gate_code: str
|
|
63
|
+
code for gating/event filtering class
|
|
64
|
+
gate_kwargs: dict
|
|
65
|
+
keyword arguments for gating/event filtering class
|
|
66
|
+
basin_strategy: str
|
|
67
|
+
strategy on how to handle event data; In principle, not all
|
|
68
|
+
events have to be stored in the output file if basins are
|
|
69
|
+
defined, linking back to the original file.
|
|
70
|
+
- You can "drain" all basins which means that the output file
|
|
71
|
+
will contain all features, but will also be very big.
|
|
72
|
+
- You can "tap" the basins, including the input file, which means
|
|
73
|
+
that the output file will be comparatively small.
|
|
74
|
+
no_basins_in_output: bool
|
|
75
|
+
Deprecated
|
|
76
|
+
num_procs: int
|
|
77
|
+
Number of processes to use
|
|
78
|
+
log_level: int
|
|
79
|
+
Logging level to use.
|
|
80
|
+
debug: bool
|
|
81
|
+
Whether to set logging level to "DEBUG" and
|
|
82
|
+
use threads instead of processes
|
|
83
|
+
"""
|
|
84
|
+
if no_basins_in_output is not None:
|
|
85
|
+
warnings.warn("The `no_basins_in_output` keyword argument is "
|
|
86
|
+
"deprecated. Please use `basin_strategy` instead.")
|
|
87
|
+
if no_basins_in_output:
|
|
88
|
+
basin_strategy = "drain"
|
|
89
|
+
else:
|
|
90
|
+
basin_strategy = "tap"
|
|
91
|
+
|
|
92
|
+
#: initialize keyword arguments for this job
|
|
93
|
+
self.kwargs = {}
|
|
94
|
+
spec = inspect.getfullargspec(DCNumPipelineJob.__init__)
|
|
95
|
+
locs = locals()
|
|
96
|
+
for arg in spec.args:
|
|
97
|
+
if arg == "self":
|
|
98
|
+
continue
|
|
99
|
+
value = locs[arg]
|
|
100
|
+
if value is None and spec.annotations[arg] is Dict:
|
|
101
|
+
value = {}
|
|
102
|
+
self.kwargs[arg] = value
|
|
103
|
+
# Set default pixel size for this job
|
|
104
|
+
if "pixel_size" not in self.kwargs["data_kwargs"]:
|
|
105
|
+
# Extract from input file
|
|
106
|
+
with HDF5Data(path_in) as hd:
|
|
107
|
+
self.kwargs["data_kwargs"]["pixel_size"] = hd.pixel_size
|
|
108
|
+
# Set default output path
|
|
109
|
+
if path_out is None:
|
|
110
|
+
pin = pathlib.Path(path_in)
|
|
111
|
+
path_out = pin.with_name(pin.stem + "_dcn.rtdc")
|
|
112
|
+
# Set logging level to DEBUG in debugging mode
|
|
113
|
+
if self.kwargs["debug"]:
|
|
114
|
+
self.kwargs["log_level"] = logging.DEBUG
|
|
115
|
+
self.kwargs["path_out"] = pathlib.Path(path_out)
|
|
116
|
+
# Set default mask kwargs for segmenter
|
|
117
|
+
self.kwargs["segmenter_kwargs"].setdefault("kwargs_mask", {})
|
|
118
|
+
# Set default number of processes
|
|
119
|
+
if num_procs is None:
|
|
120
|
+
self.kwargs["num_procs"] = mp.cpu_count()
|
|
121
|
+
|
|
122
|
+
def __getitem__(self, item):
|
|
123
|
+
return copy.deepcopy(self.kwargs[item])
|
|
124
|
+
|
|
125
|
+
def __getstate__(self):
|
|
126
|
+
state = copy.deepcopy(self.kwargs)
|
|
127
|
+
return state
|
|
128
|
+
|
|
129
|
+
def __setstate__(self, state):
|
|
130
|
+
self.kwargs.clear()
|
|
131
|
+
self.kwargs.update(copy.deepcopy(state))
|
|
132
|
+
|
|
133
|
+
def assert_pp_codes(self):
|
|
134
|
+
"""Sanity check of `self.kwargs`"""
|
|
135
|
+
# PPID classes with only one option
|
|
136
|
+
for cls, key in [
|
|
137
|
+
(HDF5Data, "data_code"),
|
|
138
|
+
(Gate, "gate_code"),
|
|
139
|
+
(QueueEventExtractor, "feature_code"),
|
|
140
|
+
]:
|
|
141
|
+
code_act = self.kwargs[key]
|
|
142
|
+
code_exp = cls.get_ppid_code()
|
|
143
|
+
if code_act != code_exp:
|
|
144
|
+
raise ValueError(f"Invalid code '{code_act}' for '{key}', "
|
|
145
|
+
f"expected '{code_exp}'!")
|
|
146
|
+
# PPID classes with multiple options
|
|
147
|
+
for options, key in [
|
|
148
|
+
(get_available_background_methods(), "background_code"),
|
|
149
|
+
(get_available_segmenters(), "segmenter_code"),
|
|
150
|
+
]:
|
|
151
|
+
code_act = self.kwargs[key]
|
|
152
|
+
if code_act not in options:
|
|
153
|
+
raise ValueError(f"Invalid code '{code_act}' for '{key}', "
|
|
154
|
+
f"expected one of '{options}'!")
|
|
155
|
+
|
|
156
|
+
def get_ppid(self, ret_hash=False, ret_dict=False):
|
|
157
|
+
self.assert_pp_codes()
|
|
158
|
+
pp_hash_kw = collections.OrderedDict()
|
|
159
|
+
pp_hash_kw["gen_id"] = DCNUM_PPID_GENERATION
|
|
160
|
+
for pp_kw, cls, cls_kw in [
|
|
161
|
+
("dat_id", HDF5Data, "data_kwargs"),
|
|
162
|
+
("bg_id",
|
|
163
|
+
get_available_background_methods()[
|
|
164
|
+
self.kwargs["background_code"]],
|
|
165
|
+
"background_kwargs"),
|
|
166
|
+
("seg_id",
|
|
167
|
+
get_available_segmenters()[self.kwargs["segmenter_code"]],
|
|
168
|
+
"segmenter_kwargs"),
|
|
169
|
+
("feat_id", QueueEventExtractor, "feature_kwargs"),
|
|
170
|
+
("gate_id", Gate, "gate_kwargs"),
|
|
171
|
+
]:
|
|
172
|
+
pp_hash_kw[pp_kw] = cls.get_ppid_from_ppkw(self.kwargs[cls_kw])
|
|
173
|
+
|
|
174
|
+
ppid = "|".join(pp_hash_kw.values())
|
|
175
|
+
|
|
176
|
+
ret = [ppid]
|
|
177
|
+
if ret_hash:
|
|
178
|
+
pp_hash = compute_pipeline_hash(**pp_hash_kw)
|
|
179
|
+
ret.append(pp_hash)
|
|
180
|
+
if ret_dict:
|
|
181
|
+
ret.append(pp_hash_kw)
|
|
182
|
+
if len(ret) == 1:
|
|
183
|
+
ret = ret[0]
|
|
184
|
+
return ret
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import numbers
|
|
3
|
+
import pathlib
|
|
4
|
+
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ExtendedJSONEncoder(json.JSONEncoder):
|
|
9
|
+
def default(self, obj):
|
|
10
|
+
if isinstance(obj, pathlib.Path):
|
|
11
|
+
return str(obj)
|
|
12
|
+
elif isinstance(obj, numbers.Integral):
|
|
13
|
+
return int(obj)
|
|
14
|
+
elif isinstance(obj, np.bool_):
|
|
15
|
+
return bool(obj)
|
|
16
|
+
elif isinstance(obj, slice):
|
|
17
|
+
return "PYTHON-SLICE", (obj.start, obj.stop, obj.step)
|
|
18
|
+
# Let the base class default method raise the TypeError
|
|
19
|
+
return json.JSONEncoder.default(self, obj)
|
dcnum/meta/__init__.py
CHANGED
dcnum/meta/paths.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
|
|
3
|
+
search_path_registry = {}
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def register_search_path(topic: str,
|
|
7
|
+
search_path: str | pathlib.Path):
|
|
8
|
+
"""Register a search path for a given topic
|
|
9
|
+
|
|
10
|
+
Search paths are a global solution for organizing the locations
|
|
11
|
+
of resources that are part of an analysis pipeline. For instance,
|
|
12
|
+
if the location of such a file that depends on where your pipeline is
|
|
13
|
+
running, you can register multiple search paths and the file will
|
|
14
|
+
be found using :func:`find_file`.
|
|
15
|
+
"""
|
|
16
|
+
topic_list = search_path_registry.setdefault(topic, [])
|
|
17
|
+
topic_list.append(pathlib.Path(search_path))
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def find_file(topic: str,
|
|
21
|
+
file_name: str):
|
|
22
|
+
"""Find a file in the search path for the given topic"""
|
|
23
|
+
search_paths = search_path_registry.get(topic, [])
|
|
24
|
+
for pp in search_paths:
|
|
25
|
+
pf = pp / file_name
|
|
26
|
+
if pf.is_file():
|
|
27
|
+
return pf
|
|
28
|
+
else:
|
|
29
|
+
raise KeyError(f"Could not find {file_name} for {topic} in the "
|
|
30
|
+
f"registered search paths {search_paths}")
|
dcnum/meta/ppid.py
CHANGED
|
@@ -4,17 +4,38 @@ import collections
|
|
|
4
4
|
import hashlib
|
|
5
5
|
import inspect
|
|
6
6
|
import pathlib
|
|
7
|
+
from typing import Dict, List, Protocol
|
|
8
|
+
import warnings
|
|
7
9
|
|
|
8
10
|
|
|
9
11
|
#: Increment this string if there are breaking changes that make
|
|
10
12
|
#: previous pipelines unreproducible.
|
|
11
|
-
DCNUM_PPID_GENERATION = "
|
|
13
|
+
DCNUM_PPID_GENERATION = "10"
|
|
12
14
|
|
|
13
15
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
+
class ClassWithPPIDCapabilities(Protocol):
|
|
17
|
+
def get_ppid(self) -> str:
|
|
18
|
+
"""full pipeline identifier for the class (instance method)"""
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
def get_ppid_code(self) -> str:
|
|
22
|
+
"""string representing the class in the pipeline (classmethod)"""
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
def get_ppid_from_ppkw(self) -> str:
|
|
26
|
+
"""pipeline identifier from specific pipeline keywords (classmethod)"""
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
def get_ppkw_from_ppid(self) -> Dict:
|
|
30
|
+
"""class keywords from full pipeline identifier (staticmethod)"""
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def compute_pipeline_hash(*, bg_id, seg_id, feat_id, gate_id,
|
|
35
|
+
dat_id="unknown", gen_id=DCNUM_PPID_GENERATION):
|
|
16
36
|
hasher = hashlib.md5()
|
|
17
|
-
hasher.update("|".join([
|
|
37
|
+
hasher.update("|".join([
|
|
38
|
+
gen_id, dat_id, bg_id, seg_id, feat_id, gate_id]).encode())
|
|
18
39
|
pph = hasher.hexdigest()
|
|
19
40
|
return pph
|
|
20
41
|
|
|
@@ -37,7 +58,10 @@ def convert_to_dtype(value, dtype):
|
|
|
37
58
|
return value
|
|
38
59
|
|
|
39
60
|
|
|
40
|
-
def get_class_method_info(class_obj,
|
|
61
|
+
def get_class_method_info(class_obj: ClassWithPPIDCapabilities,
|
|
62
|
+
static_kw_methods: List = None,
|
|
63
|
+
static_kw_defaults: Dict = None,
|
|
64
|
+
):
|
|
41
65
|
"""Return dictionary of class info with static keyword methods docs
|
|
42
66
|
|
|
43
67
|
Parameters
|
|
@@ -47,10 +71,19 @@ def get_class_method_info(class_obj, static_kw_methods=None):
|
|
|
47
71
|
static_kw_methods: list of callable
|
|
48
72
|
The methods to inspect; all kwargs-only keyword arguments
|
|
49
73
|
are extracted.
|
|
74
|
+
static_kw_defaults: dict
|
|
75
|
+
If a key in this dictionary matches an item in `static_kw_methods`,
|
|
76
|
+
then these are the default values returned in the "defaults"
|
|
77
|
+
dictionary. This is used in cases where a base class does
|
|
78
|
+
implement some annotations, but the subclass does not actually
|
|
79
|
+
use them, because e.g. they are taken from a property such as is
|
|
80
|
+
the case for the mask postprocessing of segmenter classes.
|
|
50
81
|
"""
|
|
82
|
+
if static_kw_defaults is None:
|
|
83
|
+
static_kw_defaults = {}
|
|
51
84
|
doc = class_obj.__doc__ or class_obj.__init__.__doc__
|
|
52
85
|
info = {
|
|
53
|
-
"
|
|
86
|
+
"code": class_obj.get_ppid_code(),
|
|
54
87
|
"doc": doc,
|
|
55
88
|
"title": doc.split("\n")[0],
|
|
56
89
|
}
|
|
@@ -60,19 +93,43 @@ def get_class_method_info(class_obj, static_kw_methods=None):
|
|
|
60
93
|
for mm in static_kw_methods:
|
|
61
94
|
meth = getattr(class_obj, mm)
|
|
62
95
|
spec = inspect.getfullargspec(meth)
|
|
63
|
-
|
|
96
|
+
if mm_defaults := static_kw_defaults.get(mm):
|
|
97
|
+
defau[mm] = mm_defaults
|
|
98
|
+
else:
|
|
99
|
+
defau[mm] = spec.kwonlydefaults or {}
|
|
64
100
|
annot[mm] = spec.annotations
|
|
65
101
|
info["defaults"] = defau
|
|
66
102
|
info["annotations"] = annot
|
|
67
103
|
return info
|
|
68
104
|
|
|
69
105
|
|
|
70
|
-
def kwargs_to_ppid(cls
|
|
71
|
-
|
|
106
|
+
def kwargs_to_ppid(cls: ClassWithPPIDCapabilities,
|
|
107
|
+
method: str,
|
|
108
|
+
kwargs: Dict,
|
|
109
|
+
allow_invalid_keys: bool = True):
|
|
110
|
+
info = get_class_method_info(cls, [method, "__init__"])
|
|
72
111
|
|
|
73
112
|
concat_strings = []
|
|
74
113
|
if info["defaults"][method]:
|
|
75
114
|
kwdefaults = info["defaults"][method]
|
|
115
|
+
kwdefaults_init = info["defaults"]["__init__"]
|
|
116
|
+
kw_false = (set(kwargs.keys())
|
|
117
|
+
- set(kwdefaults.keys())
|
|
118
|
+
- set(kwdefaults_init.keys()))
|
|
119
|
+
if kw_false:
|
|
120
|
+
# This should not have happened.
|
|
121
|
+
msg = (f"Invalid kwargs {kw_false} specified for method "
|
|
122
|
+
f"'{method}'! Valid kwargs are"
|
|
123
|
+
f"{sorted(kwdefaults.keys())}. If you wrote this "
|
|
124
|
+
f"segmenter and had to implement `__init__`, make sure "
|
|
125
|
+
f"that it accepts all kwonly-arguments its super class "
|
|
126
|
+
f"accepts. If this is not the case, you are probably "
|
|
127
|
+
f"passing invalid kwargs to the segmenter."
|
|
128
|
+
)
|
|
129
|
+
if allow_invalid_keys:
|
|
130
|
+
warnings.warn(msg, UserWarning)
|
|
131
|
+
else:
|
|
132
|
+
raise KeyError(msg)
|
|
76
133
|
kwannot = info["annotations"][method]
|
|
77
134
|
kws = list(kwdefaults.keys())
|
|
78
135
|
kws_abrv = get_unique_prefix(kws)
|
dcnum/read/__init__.py
CHANGED
dcnum/read/cache.py
CHANGED
|
@@ -1,42 +1,68 @@
|
|
|
1
|
+
import abc
|
|
1
2
|
import collections
|
|
2
3
|
import functools
|
|
3
4
|
import hashlib
|
|
4
5
|
import pathlib
|
|
6
|
+
from typing import Tuple
|
|
7
|
+
import warnings
|
|
5
8
|
|
|
6
9
|
import h5py
|
|
7
10
|
import numpy as np
|
|
8
11
|
|
|
9
12
|
|
|
10
|
-
class
|
|
13
|
+
class EmptyDatasetWarning(UserWarning):
|
|
14
|
+
"""Used for files that contain no actual data"""
|
|
15
|
+
pass
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BaseImageChunkCache(abc.ABC):
|
|
11
19
|
def __init__(self,
|
|
12
|
-
|
|
20
|
+
shape: Tuple[int],
|
|
13
21
|
chunk_size: int = 1000,
|
|
14
|
-
cache_size: int =
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
can be time-consuming, because an entire HDF5 chunk has to be
|
|
21
|
-
loaded, decompressed and from that one image extracted. The
|
|
22
|
-
`HDF5ImageCache` class caches the chunks from the HDF5 files
|
|
23
|
-
into memory, making single-image-access very fast.
|
|
24
|
-
"""
|
|
25
|
-
# TODO:
|
|
26
|
-
# - adjust chunking to multiples of the chunks in the dataset
|
|
27
|
-
# (which will slightly speed up things)
|
|
28
|
-
chunk_size = min(h5ds.shape[0], chunk_size)
|
|
29
|
-
self.h5ds = h5ds
|
|
30
|
-
self.chunk_size = chunk_size
|
|
31
|
-
self.boolean = boolean
|
|
32
|
-
self.cache_size = cache_size
|
|
22
|
+
cache_size: int = 2,
|
|
23
|
+
):
|
|
24
|
+
self.shape = shape
|
|
25
|
+
self._dtype = None
|
|
26
|
+
chunk_size = min(shape[0], chunk_size)
|
|
27
|
+
self._len = self.shape[0]
|
|
33
28
|
#: This is a FILO cache for the chunks
|
|
34
29
|
self.cache = collections.OrderedDict()
|
|
35
|
-
self.shape = h5ds.shape
|
|
36
30
|
self.image_shape = self.shape[1:]
|
|
37
31
|
self.chunk_shape = (chunk_size,) + self.shape[1:]
|
|
38
|
-
self.
|
|
39
|
-
self.
|
|
32
|
+
self.chunk_size = chunk_size
|
|
33
|
+
self.cache_size = cache_size
|
|
34
|
+
self.num_chunks = int(np.ceil(self._len / (self.chunk_size or 1)))
|
|
35
|
+
|
|
36
|
+
def __getitem__(self, index):
|
|
37
|
+
if isinstance(index, (slice, list, np.ndarray)):
|
|
38
|
+
if isinstance(index, slice):
|
|
39
|
+
indices = np.arange(index.start or 0,
|
|
40
|
+
index.stop or len(self),
|
|
41
|
+
index.step)
|
|
42
|
+
else:
|
|
43
|
+
indices = index
|
|
44
|
+
array_out = np.empty((len(indices),) + self.image_shape,
|
|
45
|
+
dtype=self.dtype)
|
|
46
|
+
for ii, idx in enumerate(indices):
|
|
47
|
+
array_out[ii] = self[idx]
|
|
48
|
+
return array_out
|
|
49
|
+
else:
|
|
50
|
+
chunk_index, sub_index = self._get_chunk_index_for_index(index)
|
|
51
|
+
return self.get_chunk(chunk_index)[sub_index]
|
|
52
|
+
|
|
53
|
+
def __len__(self):
|
|
54
|
+
return self._len
|
|
55
|
+
|
|
56
|
+
@property
|
|
57
|
+
def dtype(self):
|
|
58
|
+
"""data type of the image data"""
|
|
59
|
+
if self._dtype is None:
|
|
60
|
+
self._dtype = self[0].dtype
|
|
61
|
+
return self._dtype
|
|
62
|
+
|
|
63
|
+
@abc.abstractmethod
|
|
64
|
+
def _get_chunk_data(self, chunk_slice):
|
|
65
|
+
"""Implemented in subclass to obtain actual data"""
|
|
40
66
|
|
|
41
67
|
def _get_chunk_index_for_index(self, index):
|
|
42
68
|
if index < 0:
|
|
@@ -45,30 +71,19 @@ class HDF5ImageCache:
|
|
|
45
71
|
raise IndexError(
|
|
46
72
|
f"Index {index} out of bounds for HDF5ImageCache "
|
|
47
73
|
f"of size {self._len}")
|
|
74
|
+
index = int(index) # convert np.uint64 to int, so we get ints below
|
|
48
75
|
chunk_index = index // self.chunk_size
|
|
49
76
|
sub_index = index % self.chunk_size
|
|
50
77
|
return chunk_index, sub_index
|
|
51
78
|
|
|
52
|
-
def __getitem__(self, index):
|
|
53
|
-
chunk_index, sub_index = self._get_chunk_index_for_index(index)
|
|
54
|
-
return self.get_chunk(chunk_index)[sub_index]
|
|
55
|
-
|
|
56
|
-
def __len__(self):
|
|
57
|
-
return self._len
|
|
58
|
-
|
|
59
79
|
def get_chunk(self, chunk_index):
|
|
60
80
|
"""Return one chunk of images"""
|
|
61
81
|
if chunk_index not in self.cache:
|
|
62
|
-
|
|
63
|
-
self.chunk_size * (chunk_index + 1)
|
|
64
|
-
)
|
|
65
|
-
data = self.h5ds[fslice]
|
|
66
|
-
if self.boolean:
|
|
67
|
-
data = np.array(data, dtype=bool)
|
|
68
|
-
self.cache[chunk_index] = data
|
|
69
|
-
if len(self.cache) > self.cache_size:
|
|
82
|
+
if len(self.cache) >= self.cache_size:
|
|
70
83
|
# Remove the first item
|
|
71
84
|
self.cache.popitem(last=False)
|
|
85
|
+
data = self._get_chunk_data(self.get_chunk_slice(chunk_index))
|
|
86
|
+
self.cache[chunk_index] = data
|
|
72
87
|
return self.cache[chunk_index]
|
|
73
88
|
|
|
74
89
|
def get_chunk_size(self, chunk_index):
|
|
@@ -81,60 +96,77 @@ class HDF5ImageCache:
|
|
|
81
96
|
raise IndexError(f"{self} only has {self.num_chunks} chunks!")
|
|
82
97
|
return chunk_size
|
|
83
98
|
|
|
99
|
+
def get_chunk_slice(self, chunk_index):
|
|
100
|
+
"""Return the slice corresponding to the chunk index"""
|
|
101
|
+
ch_slice = slice(self.chunk_size * chunk_index,
|
|
102
|
+
self.chunk_size * (chunk_index + 1)
|
|
103
|
+
)
|
|
104
|
+
return ch_slice
|
|
105
|
+
|
|
84
106
|
def iter_chunks(self):
|
|
85
|
-
size = self.h5ds.shape[0]
|
|
86
107
|
index = 0
|
|
87
108
|
chunk = 0
|
|
88
109
|
while True:
|
|
89
110
|
yield chunk
|
|
90
111
|
chunk += 1
|
|
91
112
|
index += self.chunk_size
|
|
92
|
-
if index >=
|
|
113
|
+
if index >= self._len:
|
|
93
114
|
break
|
|
94
115
|
|
|
95
116
|
|
|
96
|
-
class
|
|
117
|
+
class HDF5ImageCache(BaseImageChunkCache):
|
|
97
118
|
def __init__(self,
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
self.num_chunks = image.num_chunks
|
|
104
|
-
self.h5ds = image.h5ds
|
|
105
|
-
self.shape = image.shape
|
|
106
|
-
self.chunk_shape = image.chunk_shape
|
|
107
|
-
#: This is a FILO cache for the corrected image chunks
|
|
108
|
-
self.cache = collections.OrderedDict()
|
|
109
|
-
self.cache_size = image.cache_size
|
|
119
|
+
h5ds: h5py.Dataset,
|
|
120
|
+
chunk_size: int = 1000,
|
|
121
|
+
cache_size: int = 2,
|
|
122
|
+
boolean: bool = False):
|
|
123
|
+
"""An HDF5 image cache
|
|
110
124
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
125
|
+
Deformability cytometry data files commonly contain image stacks
|
|
126
|
+
that are chunked in various ways. Loading just a single image
|
|
127
|
+
can be time-consuming, because an entire HDF5 chunk has to be
|
|
128
|
+
loaded, decompressed and from that one image extracted. The
|
|
129
|
+
`HDF5ImageCache` class caches the chunks from the HDF5 files
|
|
130
|
+
into memory, making single-image-access very fast.
|
|
131
|
+
"""
|
|
132
|
+
super(HDF5ImageCache, self).__init__(
|
|
133
|
+
shape=h5ds.shape,
|
|
134
|
+
chunk_size=chunk_size,
|
|
135
|
+
cache_size=cache_size)
|
|
136
|
+
# TODO:
|
|
137
|
+
# - adjust chunking to multiples of the chunks in the dataset
|
|
138
|
+
# (which might slightly speed up things)
|
|
139
|
+
self.h5ds = h5ds
|
|
140
|
+
self.boolean = boolean
|
|
117
141
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
142
|
+
if self._len == 0:
|
|
143
|
+
warnings.warn(f"Input image '{h5ds.name}' in "
|
|
144
|
+
f"file {h5ds.file.filename} has zero length",
|
|
145
|
+
EmptyDatasetWarning)
|
|
121
146
|
|
|
122
|
-
def
|
|
123
|
-
|
|
147
|
+
def _get_chunk_data(self, chunk_slice):
|
|
148
|
+
data = self.h5ds[chunk_slice]
|
|
149
|
+
if self.boolean:
|
|
150
|
+
data = np.array(data, dtype=bool)
|
|
151
|
+
return data
|
|
124
152
|
|
|
125
|
-
def get_chunk(self, chunk_index):
|
|
126
|
-
if chunk_index not in self.cache:
|
|
127
|
-
data = np.array(
|
|
128
|
-
self.image.get_chunk(chunk_index), dtype=np.int16) \
|
|
129
|
-
- self.image_bg.get_chunk(chunk_index)
|
|
130
|
-
self.cache[chunk_index] = data
|
|
131
|
-
if len(self.cache) > self.cache_size:
|
|
132
|
-
# Remove the first item
|
|
133
|
-
self.cache.popitem(last=False)
|
|
134
|
-
return self.cache[chunk_index]
|
|
135
153
|
|
|
136
|
-
|
|
137
|
-
|
|
154
|
+
class ImageCorrCache(BaseImageChunkCache):
|
|
155
|
+
def __init__(self,
|
|
156
|
+
image: HDF5ImageCache,
|
|
157
|
+
image_bg: HDF5ImageCache):
|
|
158
|
+
super(ImageCorrCache, self).__init__(
|
|
159
|
+
shape=image.shape,
|
|
160
|
+
chunk_size=image.chunk_size,
|
|
161
|
+
cache_size=image.cache_size)
|
|
162
|
+
self.image = image
|
|
163
|
+
self.image_bg = image_bg
|
|
164
|
+
|
|
165
|
+
def _get_chunk_data(self, chunk_slice):
|
|
166
|
+
data = np.array(
|
|
167
|
+
self.image._get_chunk_data(chunk_slice), dtype=np.int16) \
|
|
168
|
+
- self.image_bg._get_chunk_data(chunk_slice)
|
|
169
|
+
return data
|
|
138
170
|
|
|
139
171
|
|
|
140
172
|
@functools.cache
|
dcnum/read/const.py
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
|
-
#: Scalar features that apply to all events in a frame
|
|
1
|
+
#: Scalar features that apply to all events in a frame and which are
|
|
2
|
+
#: not computed for individual events.
|
|
2
3
|
PROTECTED_FEATURES = [
|
|
3
|
-
"
|
|
4
|
+
"bg_off",
|
|
4
5
|
"flow_rate",
|
|
5
6
|
"frame",
|
|
6
7
|
"g_force",
|
|
7
|
-
"index_online",
|
|
8
8
|
"pressure",
|
|
9
9
|
"temp",
|
|
10
10
|
"temp_amb",
|
|
11
|
-
"time"
|
|
11
|
+
"time",
|
|
12
12
|
]
|
|
13
13
|
|
|
14
|
+
# User-defined features may be anything, but if the user needs something
|
|
15
|
+
# very specific for the pipeline, having them protected is a nice feature.
|
|
14
16
|
for ii in range(10):
|
|
15
17
|
PROTECTED_FEATURES.append(f"userdef{ii}")
|