dcnum 0.13.2__py3-none-any.whl → 0.23.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dcnum might be problematic. Click here for more details.
- dcnum/_version.py +2 -2
- dcnum/feat/__init__.py +2 -1
- dcnum/feat/event_extractor_manager_thread.py +67 -33
- dcnum/feat/feat_background/__init__.py +3 -12
- dcnum/feat/feat_background/base.py +80 -65
- dcnum/feat/feat_background/bg_copy.py +31 -0
- dcnum/feat/feat_background/bg_roll_median.py +38 -30
- dcnum/feat/feat_background/bg_sparse_median.py +96 -45
- dcnum/feat/feat_brightness/__init__.py +1 -0
- dcnum/feat/feat_brightness/bright_all.py +41 -6
- dcnum/feat/feat_contour/__init__.py +4 -0
- dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
- dcnum/feat/feat_contour/volume.py +174 -0
- dcnum/feat/feat_texture/__init__.py +1 -0
- dcnum/feat/feat_texture/tex_all.py +28 -1
- dcnum/feat/gate.py +92 -70
- dcnum/feat/queue_event_extractor.py +139 -70
- dcnum/logic/__init__.py +5 -0
- dcnum/logic/ctrl.py +794 -0
- dcnum/logic/job.py +184 -0
- dcnum/logic/json_encoder.py +19 -0
- dcnum/meta/__init__.py +1 -0
- dcnum/meta/paths.py +30 -0
- dcnum/meta/ppid.py +66 -9
- dcnum/read/__init__.py +1 -0
- dcnum/read/cache.py +109 -77
- dcnum/read/const.py +6 -4
- dcnum/read/hdf5_data.py +190 -31
- dcnum/read/mapped.py +87 -0
- dcnum/segm/__init__.py +6 -15
- dcnum/segm/segm_thresh.py +7 -14
- dcnum/segm/segm_torch/__init__.py +19 -0
- dcnum/segm/segm_torch/segm_torch_base.py +125 -0
- dcnum/segm/segm_torch/segm_torch_mpo.py +71 -0
- dcnum/segm/segm_torch/segm_torch_sto.py +88 -0
- dcnum/segm/segm_torch/torch_model.py +95 -0
- dcnum/segm/segm_torch/torch_postproc.py +93 -0
- dcnum/segm/segm_torch/torch_preproc.py +114 -0
- dcnum/segm/segmenter.py +245 -96
- dcnum/segm/segmenter_manager_thread.py +39 -28
- dcnum/segm/{segmenter_cpu.py → segmenter_mpo.py} +137 -43
- dcnum/segm/segmenter_sto.py +110 -0
- dcnum/write/__init__.py +3 -1
- dcnum/write/deque_writer_thread.py +15 -5
- dcnum/write/queue_collector_thread.py +14 -17
- dcnum/write/writer.py +225 -55
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/METADATA +4 -2
- dcnum-0.23.1.dist-info/RECORD +55 -0
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/WHEEL +1 -1
- dcnum/feat/feat_moments/__init__.py +0 -3
- dcnum/segm/segmenter_gpu.py +0 -45
- dcnum-0.13.2.dist-info/RECORD +0 -40
- /dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/LICENSE +0 -0
- {dcnum-0.13.2.dist-info → dcnum-0.23.1.dist-info}/top_level.txt +0 -0
dcnum/read/hdf5_data.py
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import hashlib
|
|
3
4
|
import io
|
|
4
5
|
import json
|
|
6
|
+
import numbers
|
|
5
7
|
import pathlib
|
|
6
8
|
import tempfile
|
|
7
9
|
from typing import Dict, BinaryIO, List
|
|
@@ -13,6 +15,7 @@ import numpy as np
|
|
|
13
15
|
|
|
14
16
|
from .cache import HDF5ImageCache, ImageCorrCache, md5sum
|
|
15
17
|
from .const import PROTECTED_FEATURES
|
|
18
|
+
from .mapped import get_mapped_object, get_mapping_indices
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
class HDF5Data:
|
|
@@ -25,13 +28,48 @@ class HDF5Data:
|
|
|
25
28
|
basins: List[Dict[List[str] | str]] = None,
|
|
26
29
|
logs: Dict[List[str]] = None,
|
|
27
30
|
tables: Dict[np.ndarray] = None,
|
|
28
|
-
image_cache_size: int =
|
|
31
|
+
image_cache_size: int = 2,
|
|
32
|
+
index_mapping: int | slice | List | np.ndarray = None,
|
|
29
33
|
):
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
Parameters
|
|
37
|
+
----------
|
|
38
|
+
path:
|
|
39
|
+
path to data file
|
|
40
|
+
pixel_size:
|
|
41
|
+
pixel size in µm
|
|
42
|
+
md5_5m:
|
|
43
|
+
MD5 sum of the first 5 MiB; computed if not provided
|
|
44
|
+
meta:
|
|
45
|
+
metadata dictionary; extracted from HDF5 attributes
|
|
46
|
+
if not provided
|
|
47
|
+
basins:
|
|
48
|
+
list of basin dictionaries; extracted from HDF5 attributes
|
|
49
|
+
if not provided
|
|
50
|
+
logs:
|
|
51
|
+
dictionary of logs; extracted from HDF5 attributes
|
|
52
|
+
if not provided
|
|
53
|
+
tables:
|
|
54
|
+
dictionary of tables; extracted from HDF5 attributes
|
|
55
|
+
if not provided
|
|
56
|
+
image_cache_size:
|
|
57
|
+
size of the image cache to use when accessing image data
|
|
58
|
+
index_mapping:
|
|
59
|
+
select only a subset of input events, transparently reducing the
|
|
60
|
+
size of the dataset, possible data types are
|
|
61
|
+
- int `N`: use the first `N` events
|
|
62
|
+
- slice: use the events defined by a slice
|
|
63
|
+
- list: list of integers specifying the event indices to use
|
|
64
|
+
Numpy indexing rules apply. E.g. to only process the first
|
|
65
|
+
100 events, set this to `100` or `slice(0, 100)`.
|
|
66
|
+
"""
|
|
30
67
|
# Init is in __setstate__ so we can pickle this class
|
|
31
68
|
# and use it for multiprocessing.
|
|
32
69
|
if isinstance(path, h5py.File):
|
|
33
70
|
self.h5 = path
|
|
34
71
|
path = path.filename
|
|
72
|
+
|
|
35
73
|
self.__setstate__({"path": path,
|
|
36
74
|
"pixel_size": pixel_size,
|
|
37
75
|
"md5_5m": md5_5m,
|
|
@@ -40,6 +78,7 @@ class HDF5Data:
|
|
|
40
78
|
"logs": logs,
|
|
41
79
|
"tables": tables,
|
|
42
80
|
"image_cache_size": image_cache_size,
|
|
81
|
+
"index_mapping": index_mapping,
|
|
43
82
|
})
|
|
44
83
|
|
|
45
84
|
def __contains__(self, item):
|
|
@@ -53,24 +92,34 @@ class HDF5Data:
|
|
|
53
92
|
|
|
54
93
|
def __getitem__(self, feat):
|
|
55
94
|
if feat in ["image", "image_bg", "mask"]:
|
|
56
|
-
|
|
95
|
+
data = self.get_image_cache(feat) # already index-mapped
|
|
96
|
+
if data is None:
|
|
97
|
+
raise KeyError(f"Feature '{feat}' not found in {self}!")
|
|
98
|
+
else:
|
|
99
|
+
return data
|
|
57
100
|
elif feat in self._cache_scalar: # check for scalar cached
|
|
58
101
|
return self._cache_scalar[feat]
|
|
59
102
|
elif (feat in self.h5["events"]
|
|
60
103
|
and len(self.h5["events"][feat].shape) == 1): # cache scalar
|
|
61
|
-
self.
|
|
104
|
+
if self.index_mapping is None:
|
|
105
|
+
idx_map = slice(None) # no mapping indices, just slice
|
|
106
|
+
else:
|
|
107
|
+
idx_map = get_mapping_indices(self.index_mapping)
|
|
108
|
+
self._cache_scalar[feat] = self.h5["events"][feat][idx_map]
|
|
62
109
|
return self._cache_scalar[feat]
|
|
63
110
|
else:
|
|
64
111
|
if feat in self.h5["events"]:
|
|
65
112
|
# Not cached (possibly slow)
|
|
66
113
|
warnings.warn(f"Feature {feat} not cached (possibly slow)")
|
|
67
|
-
return
|
|
114
|
+
return get_mapped_object(
|
|
115
|
+
obj=self.h5["events"][feat],
|
|
116
|
+
index_mapping=self.index_mapping)
|
|
68
117
|
else:
|
|
69
118
|
# Check the basins
|
|
70
119
|
for idx in range(len(self.basins)):
|
|
71
120
|
bn, bn_features = self.get_basin_data(idx)
|
|
72
121
|
if bn_features and feat in bn_features:
|
|
73
|
-
return bn[feat]
|
|
122
|
+
return bn[feat] # already index-mapped
|
|
74
123
|
# If we got here, then the feature data does not exist.
|
|
75
124
|
raise KeyError(f"Feature '{feat}' not found in {self}!")
|
|
76
125
|
|
|
@@ -82,13 +131,14 @@ class HDF5Data:
|
|
|
82
131
|
"logs": self.logs,
|
|
83
132
|
"tables": self.tables,
|
|
84
133
|
"basins": self.basins,
|
|
85
|
-
"image_cache_size": self.image.cache_size
|
|
134
|
+
"image_cache_size": self.image.cache_size,
|
|
135
|
+
"index_mapping": self.index_mapping,
|
|
86
136
|
}
|
|
87
137
|
|
|
88
138
|
def __setstate__(self, state):
|
|
89
139
|
# Make sure these properties exist (we rely on __init__, because
|
|
90
140
|
# we want this class to be pickable and __init__ is not called by
|
|
91
|
-
# `pickle.load
|
|
141
|
+
# `pickle.load`).
|
|
92
142
|
# Cached properties
|
|
93
143
|
self._feats = None
|
|
94
144
|
self._keys = None
|
|
@@ -112,7 +162,7 @@ class HDF5Data:
|
|
|
112
162
|
if self.md5_5m is None:
|
|
113
163
|
if isinstance(self.path, pathlib.Path):
|
|
114
164
|
# 5MB md5sum of input file
|
|
115
|
-
self.md5_5m = md5sum(self.path, count=80)
|
|
165
|
+
self.md5_5m = md5sum(self.path, blocksize=65536, count=80)
|
|
116
166
|
else:
|
|
117
167
|
self.md5_5m = str(uuid.uuid4()).replace("-", "")
|
|
118
168
|
self.meta = state["meta"]
|
|
@@ -136,46 +186,44 @@ class HDF5Data:
|
|
|
136
186
|
if isinstance(self.meta[key], bytes):
|
|
137
187
|
self.meta[key] = self.meta[key].decode("utf-8")
|
|
138
188
|
# logs
|
|
139
|
-
for key in h5.get("logs",
|
|
189
|
+
for key in sorted(h5.get("logs", {}).keys()):
|
|
140
190
|
alog = list(h5["logs"][key])
|
|
141
191
|
if alog:
|
|
142
192
|
if isinstance(alog[0], bytes):
|
|
143
193
|
alog = [ll.decode("utf") for ll in alog]
|
|
144
194
|
self.logs[key] = alog
|
|
145
195
|
# tables
|
|
146
|
-
for tab in h5.get("tables",
|
|
196
|
+
for tab in sorted(h5.get("tables", {}).keys()):
|
|
147
197
|
tabdict = {}
|
|
148
198
|
for tkey in h5["tables"][tab].dtype.fields.keys():
|
|
149
199
|
tabdict[tkey] = \
|
|
150
200
|
np.array(h5["tables"][tab][tkey]).reshape(-1)
|
|
151
201
|
self.tables[tab] = tabdict
|
|
152
202
|
# basins
|
|
153
|
-
|
|
203
|
+
basins = []
|
|
204
|
+
for bnkey in h5.get("basins", {}).keys():
|
|
154
205
|
bn_data = "\n".join(
|
|
155
206
|
[s.decode() for s in h5["basins"][bnkey][:].tolist()])
|
|
156
207
|
bn_dict = json.loads(bn_data)
|
|
157
|
-
|
|
208
|
+
basins.append(bn_dict)
|
|
209
|
+
self.basins = sorted(basins, key=lambda x: x["name"])
|
|
158
210
|
|
|
159
211
|
if state["pixel_size"] is not None:
|
|
160
212
|
self.pixel_size = state["pixel_size"]
|
|
161
|
-
else:
|
|
162
|
-
# Set known pixel size if possible
|
|
163
|
-
did = self.meta.get("setup:identifier", "EMPTY")
|
|
164
|
-
if (did.startswith("RC-")
|
|
165
|
-
and (self.pixel_size < 0.255 or self.pixel_size > 0.275)):
|
|
166
|
-
warnings.warn(
|
|
167
|
-
f"Correcting for invalid pixel size in '{self.path}'!")
|
|
168
|
-
# Set default pixel size for Rivercyte devices
|
|
169
|
-
self.pixel_size = 0.2645
|
|
170
213
|
|
|
171
214
|
self.image_cache_size = state["image_cache_size"]
|
|
172
215
|
|
|
216
|
+
self.index_mapping = state["index_mapping"]
|
|
217
|
+
|
|
173
218
|
if self.h5 is None:
|
|
174
219
|
self.h5 = h5py.File(self.path, libver="latest")
|
|
175
220
|
|
|
176
221
|
def __len__(self):
|
|
177
222
|
if self._len is None:
|
|
178
|
-
self.
|
|
223
|
+
if self.index_mapping is not None:
|
|
224
|
+
self._len = get_mapping_indices(self.index_mapping).size
|
|
225
|
+
else:
|
|
226
|
+
self._len = self.h5.attrs["experiment:event count"]
|
|
179
227
|
return self._len
|
|
180
228
|
|
|
181
229
|
@property
|
|
@@ -217,7 +265,10 @@ class HDF5Data:
|
|
|
217
265
|
return self.meta.get("imaging:pixel size", 0)
|
|
218
266
|
|
|
219
267
|
@pixel_size.setter
|
|
220
|
-
def pixel_size(self, pixel_size):
|
|
268
|
+
def pixel_size(self, pixel_size: float):
|
|
269
|
+
# Reduce pixel_size accuracy to 8 digits after the point to
|
|
270
|
+
# enforce pipeline reproducibility (see get_ppid_from_ppkw).
|
|
271
|
+
pixel_size = float(f"{pixel_size:.8f}")
|
|
221
272
|
self.meta["imaging:pixel size"] = pixel_size
|
|
222
273
|
|
|
223
274
|
@property
|
|
@@ -230,7 +281,7 @@ class HDF5Data:
|
|
|
230
281
|
"""
|
|
231
282
|
if self._feats is None:
|
|
232
283
|
feats = []
|
|
233
|
-
for feat in self.
|
|
284
|
+
for feat in self.keys():
|
|
234
285
|
if feat in PROTECTED_FEATURES:
|
|
235
286
|
feats.append(feat)
|
|
236
287
|
self._feats = feats
|
|
@@ -245,6 +296,79 @@ class HDF5Data:
|
|
|
245
296
|
self._basin_data.clear()
|
|
246
297
|
self.h5.close()
|
|
247
298
|
|
|
299
|
+
def get_ppid(self):
|
|
300
|
+
return self.get_ppid_from_ppkw(
|
|
301
|
+
{"pixel_size": self.pixel_size,
|
|
302
|
+
"index_mapping": self.index_mapping})
|
|
303
|
+
|
|
304
|
+
@classmethod
|
|
305
|
+
def get_ppid_code(cls):
|
|
306
|
+
return "hdf"
|
|
307
|
+
|
|
308
|
+
@classmethod
|
|
309
|
+
def get_ppid_from_ppkw(cls, kwargs):
|
|
310
|
+
# Data does not really fit into the PPID scheme we use for the rest
|
|
311
|
+
# of the pipeline. This implementation here is custom.
|
|
312
|
+
code = cls.get_ppid_code()
|
|
313
|
+
# pixel size
|
|
314
|
+
ppid_ps = f"{kwargs['pixel_size']:.8f}".rstrip("0")
|
|
315
|
+
# index mapping
|
|
316
|
+
ppid_im = cls.get_ppid_index_mapping(kwargs.get("index_mapping", None))
|
|
317
|
+
kwid = "^".join([f"p={ppid_ps}", f"i={ppid_im}"])
|
|
318
|
+
return ":".join([code, kwid])
|
|
319
|
+
|
|
320
|
+
@staticmethod
|
|
321
|
+
def get_ppid_index_mapping(index_mapping):
|
|
322
|
+
"""Return the pipeline identifier part for index mapping"""
|
|
323
|
+
im = index_mapping
|
|
324
|
+
if im is None:
|
|
325
|
+
dim = "0"
|
|
326
|
+
elif isinstance(im, numbers.Integral):
|
|
327
|
+
dim = f"{im}"
|
|
328
|
+
elif isinstance(im, slice):
|
|
329
|
+
dim = (f"{im.start if im.start is not None else 'n'}"
|
|
330
|
+
+ f"-{im.stop if im.stop is not None else 'n'}"
|
|
331
|
+
+ f"-{im.step if im.step is not None else 'n'}"
|
|
332
|
+
)
|
|
333
|
+
elif isinstance(im, (list, np.ndarray)):
|
|
334
|
+
idhash = hashlib.md5(
|
|
335
|
+
np.array(im, dtype=np.uint32).tobytes()).hexdigest()
|
|
336
|
+
dim = f"h-{idhash[:8]}"
|
|
337
|
+
else:
|
|
338
|
+
dim = "unknown"
|
|
339
|
+
return dim
|
|
340
|
+
|
|
341
|
+
@staticmethod
|
|
342
|
+
def get_ppkw_from_ppid(dat_ppid):
|
|
343
|
+
# Data does not fit in the PPID scheme we use, but we still
|
|
344
|
+
# would like to pass pixel_size to __init__ if we need it.
|
|
345
|
+
code, kwargs_str = dat_ppid.split(":")
|
|
346
|
+
if code != HDF5Data.get_ppid_code():
|
|
347
|
+
raise ValueError(f"Could not find data method '{code}'!")
|
|
348
|
+
kwitems = kwargs_str.split("^")
|
|
349
|
+
kwargs = {}
|
|
350
|
+
for item in kwitems:
|
|
351
|
+
var, val = item.split("=")
|
|
352
|
+
if var == "p":
|
|
353
|
+
kwargs["pixel_size"] = float(val)
|
|
354
|
+
elif var == "i":
|
|
355
|
+
if val.startswith("h-") or val == "unknown":
|
|
356
|
+
raise ValueError(f"Cannot invert index mapping {val}")
|
|
357
|
+
elif val == "0":
|
|
358
|
+
kwargs["index_mapping"] = None
|
|
359
|
+
elif val.count("-"):
|
|
360
|
+
start, stop, step = val.split("-")
|
|
361
|
+
kwargs["index_mapping"] = slice(
|
|
362
|
+
None if start == "n" else int(start),
|
|
363
|
+
None if stop == "n" else int(stop),
|
|
364
|
+
None if step == "n" else int(step)
|
|
365
|
+
)
|
|
366
|
+
else:
|
|
367
|
+
kwargs["index_mapping"] = int(val)
|
|
368
|
+
else:
|
|
369
|
+
raise ValueError(f"Invalid parameter '{var}'!")
|
|
370
|
+
return kwargs
|
|
371
|
+
|
|
248
372
|
def get_basin_data(self, index):
|
|
249
373
|
"""Return HDF5Data info for a basin index in `self.basins`
|
|
250
374
|
|
|
@@ -273,9 +397,34 @@ class HDF5Data:
|
|
|
273
397
|
if path is None:
|
|
274
398
|
self._basin_data[index] = (None, None)
|
|
275
399
|
else:
|
|
276
|
-
|
|
400
|
+
feat_basinmap = bn_dict.get("mapping", None)
|
|
401
|
+
if feat_basinmap is None:
|
|
402
|
+
# This is NOT a mapped basin.
|
|
403
|
+
index_mapping = self.index_mapping
|
|
404
|
+
else:
|
|
405
|
+
# This is a mapped basin. Create an indexing list.
|
|
406
|
+
if self.index_mapping is None:
|
|
407
|
+
# The current dataset is not mapped.
|
|
408
|
+
basinmap_idx = slice(None)
|
|
409
|
+
else:
|
|
410
|
+
# The current dataset is also mapped.
|
|
411
|
+
basinmap_idx = get_mapping_indices(self.index_mapping)
|
|
412
|
+
basinmap = self.h5[f"events/{feat_basinmap}"]
|
|
413
|
+
index_mapping = basinmap[basinmap_idx]
|
|
414
|
+
|
|
415
|
+
h5dat = HDF5Data(path, index_mapping=index_mapping)
|
|
277
416
|
features = bn_dict.get("features")
|
|
278
417
|
if features is None:
|
|
418
|
+
# Only get the features from the actual HDF5 file.
|
|
419
|
+
# If this file has basins as well, the basin metadata
|
|
420
|
+
# should have been copied over to the parent file. This
|
|
421
|
+
# makes things a little cleaner, because basins are not
|
|
422
|
+
# nested, but all basins are available in the top file.
|
|
423
|
+
# See :func:`write.store_metadata` for copying metadata
|
|
424
|
+
# between files.
|
|
425
|
+
# The writer can still specify "features" in the basin
|
|
426
|
+
# metadata, then these basins are indeed nested, and
|
|
427
|
+
# we consider that ok as well.
|
|
279
428
|
features = sorted(h5dat.h5["events"].keys())
|
|
280
429
|
self._basin_data[index] = (h5dat, features)
|
|
281
430
|
return self._basin_data[index]
|
|
@@ -288,20 +437,27 @@ class HDF5Data:
|
|
|
288
437
|
if feat not in self._image_cache:
|
|
289
438
|
if f"events/{feat}" in self.h5:
|
|
290
439
|
ds = self.h5[f"events/{feat}"]
|
|
440
|
+
idx_map = self.index_mapping
|
|
291
441
|
else:
|
|
442
|
+
idx_map = None
|
|
292
443
|
# search all basins
|
|
293
444
|
for idx in range(len(self.basins)):
|
|
294
|
-
|
|
445
|
+
bn_dat, features = self.get_basin_data(idx)
|
|
295
446
|
if features is not None:
|
|
296
447
|
if feat in features:
|
|
297
|
-
|
|
448
|
+
# HDF5 dataset
|
|
449
|
+
ds = bn_dat.h5[f"events/{feat}"]
|
|
450
|
+
# Index mapping (taken from the basins which
|
|
451
|
+
# already includes the mapping from the current
|
|
452
|
+
# instance).
|
|
453
|
+
idx_map = bn_dat.index_mapping
|
|
298
454
|
break
|
|
299
455
|
else:
|
|
300
456
|
ds = None
|
|
301
457
|
|
|
302
458
|
if ds is not None:
|
|
303
459
|
image = HDF5ImageCache(
|
|
304
|
-
h5ds=ds,
|
|
460
|
+
h5ds=get_mapped_object(obj=ds, index_mapping=idx_map),
|
|
305
461
|
cache_size=self.image_cache_size,
|
|
306
462
|
boolean=feat == "mask")
|
|
307
463
|
else:
|
|
@@ -337,8 +493,8 @@ def concatenated_hdf5_data(paths: List[pathlib.Path],
|
|
|
337
493
|
path_out:
|
|
338
494
|
If `None`, then the dataset is created in memory. If `True`
|
|
339
495
|
(default), create a file on disk. If a pathlib.Path is specified,
|
|
340
|
-
the dataset is written to that file. Note that
|
|
341
|
-
are
|
|
496
|
+
the dataset is written to that file. Note that datasets in memory
|
|
497
|
+
are likely not pickable (so don't use them for multiprocessing).
|
|
342
498
|
compute_frame:
|
|
343
499
|
Whether to compute the "events/frame" feature, taking the frame
|
|
344
500
|
data from the input files and properly incrementing them along
|
|
@@ -351,6 +507,7 @@ def concatenated_hdf5_data(paths: List[pathlib.Path],
|
|
|
351
507
|
- If one of the input files does not contain a feature from the first
|
|
352
508
|
input `paths`, then a `ValueError` is raised. Use the `features`
|
|
353
509
|
argument to specify which features you need instead.
|
|
510
|
+
- Basins are not considered.
|
|
354
511
|
"""
|
|
355
512
|
h5kwargs = {"mode": "w", "libver": "latest"}
|
|
356
513
|
if isinstance(path_out, (pathlib.Path, str)):
|
|
@@ -386,7 +543,8 @@ def concatenated_hdf5_data(paths: List[pathlib.Path],
|
|
|
386
543
|
# get metadata
|
|
387
544
|
if ii == 0:
|
|
388
545
|
meta = dict(h5.attrs)
|
|
389
|
-
|
|
546
|
+
if not features:
|
|
547
|
+
features = featsi
|
|
390
548
|
# make sure number of features are consistent
|
|
391
549
|
if not set(features) <= set(featsi):
|
|
392
550
|
raise ValueError(
|
|
@@ -396,6 +554,7 @@ def concatenated_hdf5_data(paths: List[pathlib.Path],
|
|
|
396
554
|
if not isinstance(h5["events"][feat], h5py.Dataset):
|
|
397
555
|
warnings.warn(
|
|
398
556
|
f"Ignoring {feat}; not implemented yet!")
|
|
557
|
+
continue
|
|
399
558
|
if feat in ["frame", "time"]:
|
|
400
559
|
continue
|
|
401
560
|
shapes.setdefault(feat, []).append(
|
dcnum/read/mapped.py
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
import functools
|
|
2
|
+
|
|
3
|
+
import numbers
|
|
4
|
+
|
|
5
|
+
import h5py
|
|
6
|
+
import numpy as np
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class MappedHDF5Dataset:
|
|
10
|
+
def __init__(self,
|
|
11
|
+
h5ds: h5py.Dataset,
|
|
12
|
+
mapping_indices: np.ndarray):
|
|
13
|
+
"""An index-mapped object for accessing an HDF5 dataset
|
|
14
|
+
|
|
15
|
+
Parameters
|
|
16
|
+
----------
|
|
17
|
+
h5ds: h5py.Dataset
|
|
18
|
+
HDF5 dataset from which to map data
|
|
19
|
+
mapping_indices: np.ndarray
|
|
20
|
+
numpy indexing array containing integer indices
|
|
21
|
+
"""
|
|
22
|
+
self.h5ds = h5ds
|
|
23
|
+
self.mapping_indices = mapping_indices
|
|
24
|
+
self.shape = (mapping_indices.size,) + h5ds.shape[1:]
|
|
25
|
+
|
|
26
|
+
def __getitem__(self, idx):
|
|
27
|
+
if isinstance(idx, numbers.Integral):
|
|
28
|
+
return self.h5ds[self.mapping_indices[idx]]
|
|
29
|
+
else:
|
|
30
|
+
idx_mapped = self.mapping_indices[idx]
|
|
31
|
+
return self.h5ds[idx_mapped]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_mapping_indices(
|
|
35
|
+
index_mapping: numbers.Integral | slice | list | np.ndarray
|
|
36
|
+
):
|
|
37
|
+
"""Return integer numpy array with mapping indices for a range
|
|
38
|
+
|
|
39
|
+
Parameters
|
|
40
|
+
----------
|
|
41
|
+
index_mapping: numbers.Integral | slice | list | np.ndarray
|
|
42
|
+
Several options you have here:
|
|
43
|
+
- integer: results in np.arrange(integer)
|
|
44
|
+
- slice: results in np.arrange(slice.start, slice.stop, slice.step)
|
|
45
|
+
- list or np.ndarray: returns the input as unit32 array
|
|
46
|
+
"""
|
|
47
|
+
if isinstance(index_mapping, numbers.Integral):
|
|
48
|
+
return _get_mapping_indices_cached(index_mapping)
|
|
49
|
+
elif isinstance(index_mapping, slice):
|
|
50
|
+
return _get_mapping_indices_cached(
|
|
51
|
+
(index_mapping.start, index_mapping.stop, index_mapping.step))
|
|
52
|
+
elif isinstance(index_mapping, (np.ndarray, list)):
|
|
53
|
+
return np.array(index_mapping, dtype=np.uint32)
|
|
54
|
+
else:
|
|
55
|
+
raise ValueError(f"Invalid type for `index_mapping`: "
|
|
56
|
+
f"{type(index_mapping)} ({index_mapping})")
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@functools.lru_cache(maxsize=100)
|
|
60
|
+
def _get_mapping_indices_cached(
|
|
61
|
+
index_mapping: numbers.Integral | tuple
|
|
62
|
+
):
|
|
63
|
+
if isinstance(index_mapping, numbers.Integral):
|
|
64
|
+
return np.arange(index_mapping)
|
|
65
|
+
elif isinstance(index_mapping, tuple):
|
|
66
|
+
im_slice = slice(*index_mapping)
|
|
67
|
+
if im_slice.stop is None or im_slice.start is None:
|
|
68
|
+
raise NotImplementedError(
|
|
69
|
+
"Slices must have start and stop defined")
|
|
70
|
+
return np.arange(im_slice.start, im_slice.stop, im_slice.step)
|
|
71
|
+
elif isinstance(index_mapping, list):
|
|
72
|
+
return np.array(index_mapping, dtype=np.uint32)
|
|
73
|
+
else:
|
|
74
|
+
raise ValueError(f"Invalid type for cached `index_mapping`: "
|
|
75
|
+
f"{type(index_mapping)} ({index_mapping})")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def get_mapped_object(obj, index_mapping=None):
|
|
79
|
+
if index_mapping is None:
|
|
80
|
+
return obj
|
|
81
|
+
elif isinstance(obj, h5py.Dataset):
|
|
82
|
+
return MappedHDF5Dataset(
|
|
83
|
+
obj,
|
|
84
|
+
mapping_indices=get_mapping_indices(index_mapping))
|
|
85
|
+
else:
|
|
86
|
+
raise ValueError(f"No recipe to convert object of type {type(obj)} "
|
|
87
|
+
f"({obj}) to an index-mapped object")
|
dcnum/segm/__init__.py
CHANGED
|
@@ -1,18 +1,9 @@
|
|
|
1
1
|
# flake8: noqa: F401
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
from .
|
|
6
|
-
from .
|
|
2
|
+
from .segmenter import (
|
|
3
|
+
Segmenter, SegmenterNotApplicableError, get_available_segmenters
|
|
4
|
+
)
|
|
5
|
+
from .segmenter_mpo import MPOSegmenter
|
|
6
|
+
from .segmenter_sto import STOSegmenter
|
|
7
7
|
from .segmenter_manager_thread import SegmenterManagerThread
|
|
8
8
|
from . import segm_thresh
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
@functools.cache
|
|
12
|
-
def get_available_segmenters():
|
|
13
|
-
"""Return dictionary of available segmenters"""
|
|
14
|
-
segmenters = {}
|
|
15
|
-
for scls in Segmenter.__subclasses__():
|
|
16
|
-
for cls in scls.__subclasses__():
|
|
17
|
-
segmenters[cls.key()] = cls
|
|
18
|
-
return segmenters
|
|
9
|
+
from . import segm_torch
|
dcnum/segm/segm_thresh.py
CHANGED
|
@@ -1,26 +1,19 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from .segmenter_mpo import MPOSegmenter
|
|
2
2
|
|
|
3
3
|
|
|
4
|
-
class SegmentThresh(
|
|
4
|
+
class SegmentThresh(MPOSegmenter):
|
|
5
5
|
mask_postprocessing = True
|
|
6
6
|
mask_default_kwargs = {
|
|
7
7
|
"clear_border": True,
|
|
8
8
|
"fill_holes": True,
|
|
9
9
|
"closing_disk": 2,
|
|
10
10
|
}
|
|
11
|
-
|
|
12
|
-
def __init__(self, thresh=-6, *args, **kwargs):
|
|
13
|
-
"""Simple image thresholding segmentation
|
|
14
|
-
|
|
15
|
-
Parameters
|
|
16
|
-
----------
|
|
17
|
-
"""
|
|
18
|
-
super(SegmentThresh, self).__init__(thresh=thresh, *args, **kwargs)
|
|
11
|
+
requires_background_correction = True
|
|
19
12
|
|
|
20
13
|
@staticmethod
|
|
21
|
-
def
|
|
22
|
-
|
|
23
|
-
"""Mask retrieval
|
|
14
|
+
def segment_algorithm(image, *,
|
|
15
|
+
thresh: float = -6):
|
|
16
|
+
"""Mask retrieval using basic thresholding
|
|
24
17
|
|
|
25
18
|
Parameters
|
|
26
19
|
----------
|
|
@@ -34,7 +27,7 @@ class SegmentThresh(CPUSegmenter):
|
|
|
34
27
|
Returns
|
|
35
28
|
-------
|
|
36
29
|
mask: 2d boolean ndarray
|
|
37
|
-
Mask image for the
|
|
30
|
+
Mask image for the given index
|
|
38
31
|
"""
|
|
39
32
|
assert thresh < 0, "threshold values above zero not supported!"
|
|
40
33
|
return image < thresh
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import importlib
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
torch = importlib.import_module("torch")
|
|
5
|
+
req_maj = 2
|
|
6
|
+
req_min = 3
|
|
7
|
+
ver_tuple = torch.__version__.split(".")
|
|
8
|
+
act_maj = int(ver_tuple[0])
|
|
9
|
+
act_min = int(ver_tuple[1])
|
|
10
|
+
if act_maj < req_maj or (act_maj == req_maj and act_min < req_min):
|
|
11
|
+
raise ValueError(f"Your PyTorch version {act_maj}.{act_min} is not "
|
|
12
|
+
f"supported, please update to at least "
|
|
13
|
+
f"{req_maj}.{req_min}")
|
|
14
|
+
except ImportError:
|
|
15
|
+
pass
|
|
16
|
+
else:
|
|
17
|
+
from .segm_torch_mpo import SegmentTorchMPO # noqa: F401
|
|
18
|
+
if torch.cuda.is_available():
|
|
19
|
+
from .segm_torch_sto import SegmentTorchSTO # noqa: F401
|