dcnum 0.17.0__py3-none-any.whl → 0.23.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

Files changed (49) hide show
  1. dcnum/_version.py +2 -2
  2. dcnum/feat/__init__.py +1 -1
  3. dcnum/feat/event_extractor_manager_thread.py +34 -25
  4. dcnum/feat/feat_background/base.py +22 -26
  5. dcnum/feat/feat_background/bg_copy.py +18 -12
  6. dcnum/feat/feat_background/bg_roll_median.py +20 -10
  7. dcnum/feat/feat_background/bg_sparse_median.py +55 -7
  8. dcnum/feat/feat_brightness/bright_all.py +41 -6
  9. dcnum/feat/feat_contour/__init__.py +4 -0
  10. dcnum/feat/{feat_moments/mt_legacy.py → feat_contour/moments.py} +32 -8
  11. dcnum/feat/feat_contour/volume.py +174 -0
  12. dcnum/feat/feat_texture/tex_all.py +28 -1
  13. dcnum/feat/gate.py +2 -2
  14. dcnum/feat/queue_event_extractor.py +30 -9
  15. dcnum/logic/ctrl.py +222 -48
  16. dcnum/logic/job.py +85 -2
  17. dcnum/logic/json_encoder.py +2 -0
  18. dcnum/meta/ppid.py +17 -3
  19. dcnum/read/__init__.py +1 -0
  20. dcnum/read/cache.py +100 -78
  21. dcnum/read/const.py +6 -4
  22. dcnum/read/hdf5_data.py +146 -23
  23. dcnum/read/mapped.py +87 -0
  24. dcnum/segm/__init__.py +6 -3
  25. dcnum/segm/segm_thresh.py +6 -18
  26. dcnum/segm/segm_torch/__init__.py +23 -0
  27. dcnum/segm/segm_torch/segm_torch_base.py +125 -0
  28. dcnum/segm/segm_torch/segm_torch_mpo.py +71 -0
  29. dcnum/segm/segm_torch/segm_torch_sto.py +88 -0
  30. dcnum/segm/segm_torch/torch_model.py +95 -0
  31. dcnum/segm/segm_torch/torch_postproc.py +93 -0
  32. dcnum/segm/segm_torch/torch_preproc.py +114 -0
  33. dcnum/segm/segmenter.py +181 -80
  34. dcnum/segm/segmenter_manager_thread.py +38 -30
  35. dcnum/segm/{segmenter_cpu.py → segmenter_mpo.py} +116 -44
  36. dcnum/segm/segmenter_sto.py +110 -0
  37. dcnum/write/__init__.py +2 -1
  38. dcnum/write/deque_writer_thread.py +9 -1
  39. dcnum/write/queue_collector_thread.py +8 -14
  40. dcnum/write/writer.py +128 -5
  41. {dcnum-0.17.0.dist-info → dcnum-0.23.2.dist-info}/METADATA +4 -2
  42. dcnum-0.23.2.dist-info/RECORD +55 -0
  43. {dcnum-0.17.0.dist-info → dcnum-0.23.2.dist-info}/WHEEL +1 -1
  44. dcnum/feat/feat_moments/__init__.py +0 -4
  45. dcnum/segm/segmenter_gpu.py +0 -64
  46. dcnum-0.17.0.dist-info/RECORD +0 -46
  47. /dcnum/feat/{feat_moments/ct_opencv.py → feat_contour/contour.py} +0 -0
  48. {dcnum-0.17.0.dist-info → dcnum-0.23.2.dist-info}/LICENSE +0 -0
  49. {dcnum-0.17.0.dist-info → dcnum-0.23.2.dist-info}/top_level.txt +0 -0
dcnum/read/cache.py CHANGED
@@ -1,7 +1,9 @@
1
+ import abc
1
2
  import collections
2
3
  import functools
3
4
  import hashlib
4
5
  import pathlib
6
+ from typing import Tuple
5
7
  import warnings
6
8
 
7
9
  import h5py
@@ -13,41 +15,55 @@ class EmptyDatasetWarning(UserWarning):
13
15
  pass
14
16
 
15
17
 
16
- class HDF5ImageCache:
18
+ class BaseImageChunkCache(abc.ABC):
17
19
  def __init__(self,
18
- h5ds: h5py.Dataset,
20
+ shape: Tuple[int],
19
21
  chunk_size: int = 1000,
20
22
  cache_size: int = 2,
21
- boolean: bool = False):
22
- """An HDF5 image cache
23
-
24
- Deformability cytometry data files commonly contain image stacks
25
- that are chunked in various ways. Loading just a single image
26
- can be time-consuming, because an entire HDF5 chunk has to be
27
- loaded, decompressed and from that one image extracted. The
28
- `HDF5ImageCache` class caches the chunks from the HDF5 files
29
- into memory, making single-image-access very fast.
30
- """
31
- self.shape = h5ds.shape
23
+ ):
24
+ self.shape = shape
25
+ self._dtype = None
26
+ chunk_size = min(shape[0], chunk_size)
32
27
  self._len = self.shape[0]
33
- if self._len == 0:
34
- warnings.warn(f"Input image '{h5ds.name}' in "
35
- f"file {h5ds.file.filename} has zero length",
36
- EmptyDatasetWarning)
37
- # TODO:
38
- # - adjust chunking to multiples of the chunks in the dataset
39
- # (which might slightly speed up things)
40
- chunk_size = min(h5ds.shape[0], chunk_size)
41
- self.h5ds = h5ds
42
- self.chunk_size = chunk_size
43
- self.boolean = boolean
44
- self.cache_size = cache_size
45
28
  #: This is a FILO cache for the chunks
46
29
  self.cache = collections.OrderedDict()
47
30
  self.image_shape = self.shape[1:]
48
31
  self.chunk_shape = (chunk_size,) + self.shape[1:]
32
+ self.chunk_size = chunk_size
33
+ self.cache_size = cache_size
49
34
  self.num_chunks = int(np.ceil(self._len / (self.chunk_size or 1)))
50
35
 
36
+ def __getitem__(self, index):
37
+ if isinstance(index, (slice, list, np.ndarray)):
38
+ if isinstance(index, slice):
39
+ indices = np.arange(index.start or 0,
40
+ index.stop or len(self),
41
+ index.step)
42
+ else:
43
+ indices = index
44
+ array_out = np.empty((len(indices),) + self.image_shape,
45
+ dtype=self.dtype)
46
+ for ii, idx in enumerate(indices):
47
+ array_out[ii] = self[idx]
48
+ return array_out
49
+ else:
50
+ chunk_index, sub_index = self._get_chunk_index_for_index(index)
51
+ return self.get_chunk(chunk_index)[sub_index]
52
+
53
+ def __len__(self):
54
+ return self._len
55
+
56
+ @property
57
+ def dtype(self):
58
+ """data type of the image data"""
59
+ if self._dtype is None:
60
+ self._dtype = self[0].dtype
61
+ return self._dtype
62
+
63
+ @abc.abstractmethod
64
+ def _get_chunk_data(self, chunk_slice):
65
+ """Implemented in subclass to obtain actual data"""
66
+
51
67
  def _get_chunk_index_for_index(self, index):
52
68
  if index < 0:
53
69
  index = self._len + index
@@ -55,30 +71,19 @@ class HDF5ImageCache:
55
71
  raise IndexError(
56
72
  f"Index {index} out of bounds for HDF5ImageCache "
57
73
  f"of size {self._len}")
74
+ index = int(index) # convert np.uint64 to int, so we get ints below
58
75
  chunk_index = index // self.chunk_size
59
76
  sub_index = index % self.chunk_size
60
77
  return chunk_index, sub_index
61
78
 
62
- def __getitem__(self, index):
63
- chunk_index, sub_index = self._get_chunk_index_for_index(index)
64
- return self.get_chunk(chunk_index)[sub_index]
65
-
66
- def __len__(self):
67
- return self._len
68
-
69
79
  def get_chunk(self, chunk_index):
70
80
  """Return one chunk of images"""
71
81
  if chunk_index not in self.cache:
72
- fslice = slice(self.chunk_size * chunk_index,
73
- self.chunk_size * (chunk_index + 1)
74
- )
75
- data = self.h5ds[fslice]
76
- if self.boolean:
77
- data = np.array(data, dtype=bool)
78
- self.cache[chunk_index] = data
79
- if len(self.cache) > self.cache_size:
82
+ if len(self.cache) >= self.cache_size:
80
83
  # Remove the first item
81
84
  self.cache.popitem(last=False)
85
+ data = self._get_chunk_data(self.get_chunk_slice(chunk_index))
86
+ self.cache[chunk_index] = data
82
87
  return self.cache[chunk_index]
83
88
 
84
89
  def get_chunk_size(self, chunk_index):
@@ -91,60 +96,77 @@ class HDF5ImageCache:
91
96
  raise IndexError(f"{self} only has {self.num_chunks} chunks!")
92
97
  return chunk_size
93
98
 
99
+ def get_chunk_slice(self, chunk_index):
100
+ """Return the slice corresponding to the chunk index"""
101
+ ch_slice = slice(self.chunk_size * chunk_index,
102
+ self.chunk_size * (chunk_index + 1)
103
+ )
104
+ return ch_slice
105
+
94
106
  def iter_chunks(self):
95
- size = self.h5ds.shape[0]
96
107
  index = 0
97
108
  chunk = 0
98
109
  while True:
99
110
  yield chunk
100
111
  chunk += 1
101
112
  index += self.chunk_size
102
- if index >= size:
113
+ if index >= self._len:
103
114
  break
104
115
 
105
116
 
106
- class ImageCorrCache:
117
+ class HDF5ImageCache(BaseImageChunkCache):
107
118
  def __init__(self,
108
- image: HDF5ImageCache,
109
- image_bg: HDF5ImageCache):
110
- self.image = image
111
- self.image_bg = image_bg
112
- self.chunk_size = image.chunk_size
113
- self.num_chunks = image.num_chunks
114
- self.h5ds = image.h5ds
115
- self.shape = image.shape
116
- self.chunk_shape = image.chunk_shape
117
- #: This is a FILO cache for the corrected image chunks
118
- self.cache = collections.OrderedDict()
119
- self.cache_size = image.cache_size
119
+ h5ds: h5py.Dataset,
120
+ chunk_size: int = 1000,
121
+ cache_size: int = 2,
122
+ boolean: bool = False):
123
+ """An HDF5 image cache
120
124
 
121
- def _get_chunk_index_for_index(self, index):
122
- if index < 0:
123
- index = len(self.h5ds) + index
124
- chunk_index = index // self.chunk_size
125
- sub_index = index % self.chunk_size
126
- return chunk_index, sub_index
125
+ Deformability cytometry data files commonly contain image stacks
126
+ that are chunked in various ways. Loading just a single image
127
+ can be time-consuming, because an entire HDF5 chunk has to be
128
+ loaded, decompressed and from that one image extracted. The
129
+ `HDF5ImageCache` class caches the chunks from the HDF5 files
130
+ into memory, making single-image-access very fast.
131
+ """
132
+ super(HDF5ImageCache, self).__init__(
133
+ shape=h5ds.shape,
134
+ chunk_size=chunk_size,
135
+ cache_size=cache_size)
136
+ # TODO:
137
+ # - adjust chunking to multiples of the chunks in the dataset
138
+ # (which might slightly speed up things)
139
+ self.h5ds = h5ds
140
+ self.boolean = boolean
127
141
 
128
- def __getitem__(self, index):
129
- chunk_index, sub_index = self._get_chunk_index_for_index(index)
130
- return self.get_chunk(chunk_index)[sub_index]
142
+ if self._len == 0:
143
+ warnings.warn(f"Input image '{h5ds.name}' in "
144
+ f"file {h5ds.file.filename} has zero length",
145
+ EmptyDatasetWarning)
131
146
 
132
- def __len__(self):
133
- return len(self.image)
147
+ def _get_chunk_data(self, chunk_slice):
148
+ data = self.h5ds[chunk_slice]
149
+ if self.boolean:
150
+ data = np.array(data, dtype=bool)
151
+ return data
134
152
 
135
- def get_chunk(self, chunk_index):
136
- if chunk_index not in self.cache:
137
- data = np.array(
138
- self.image.get_chunk(chunk_index), dtype=np.int16) \
139
- - self.image_bg.get_chunk(chunk_index)
140
- self.cache[chunk_index] = data
141
- if len(self.cache) > self.cache_size:
142
- # Remove the first item
143
- self.cache.popitem(last=False)
144
- return self.cache[chunk_index]
145
153
 
146
- def iter_chunks(self):
147
- return self.image.iter_chunks()
154
+ class ImageCorrCache(BaseImageChunkCache):
155
+ def __init__(self,
156
+ image: HDF5ImageCache,
157
+ image_bg: HDF5ImageCache):
158
+ super(ImageCorrCache, self).__init__(
159
+ shape=image.shape,
160
+ chunk_size=image.chunk_size,
161
+ cache_size=image.cache_size)
162
+ self.image = image
163
+ self.image_bg = image_bg
164
+
165
+ def _get_chunk_data(self, chunk_slice):
166
+ data = np.array(
167
+ self.image._get_chunk_data(chunk_slice), dtype=np.int16) \
168
+ - self.image_bg._get_chunk_data(chunk_slice)
169
+ return data
148
170
 
149
171
 
150
172
  @functools.cache
dcnum/read/const.py CHANGED
@@ -1,15 +1,17 @@
1
- #: Scalar features that apply to all events in a frame
1
+ #: Scalar features that apply to all events in a frame and which are
2
+ #: not computed for individual events.
2
3
  PROTECTED_FEATURES = [
3
- "bg_med",
4
+ "bg_off",
4
5
  "flow_rate",
5
6
  "frame",
6
7
  "g_force",
7
- "index_online",
8
8
  "pressure",
9
9
  "temp",
10
10
  "temp_amb",
11
- "time"
11
+ "time",
12
12
  ]
13
13
 
14
+ # User-defined features may be anything, but if the user needs something
15
+ # very specific for the pipeline, having them protected is a nice feature.
14
16
  for ii in range(10):
15
17
  PROTECTED_FEATURES.append(f"userdef{ii}")
dcnum/read/hdf5_data.py CHANGED
@@ -1,7 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import hashlib
3
4
  import io
4
5
  import json
6
+ import numbers
5
7
  import pathlib
6
8
  import tempfile
7
9
  from typing import Dict, BinaryIO, List
@@ -13,6 +15,7 @@ import numpy as np
13
15
 
14
16
  from .cache import HDF5ImageCache, ImageCorrCache, md5sum
15
17
  from .const import PROTECTED_FEATURES
18
+ from .mapped import get_mapped_object, get_mapping_indices
16
19
 
17
20
 
18
21
  class HDF5Data:
@@ -26,12 +29,47 @@ class HDF5Data:
26
29
  logs: Dict[List[str]] = None,
27
30
  tables: Dict[np.ndarray] = None,
28
31
  image_cache_size: int = 2,
32
+ index_mapping: int | slice | List | np.ndarray = None,
29
33
  ):
34
+ """
35
+
36
+ Parameters
37
+ ----------
38
+ path:
39
+ path to data file
40
+ pixel_size:
41
+ pixel size in µm
42
+ md5_5m:
43
+ MD5 sum of the first 5 MiB; computed if not provided
44
+ meta:
45
+ metadata dictionary; extracted from HDF5 attributes
46
+ if not provided
47
+ basins:
48
+ list of basin dictionaries; extracted from HDF5 attributes
49
+ if not provided
50
+ logs:
51
+ dictionary of logs; extracted from HDF5 attributes
52
+ if not provided
53
+ tables:
54
+ dictionary of tables; extracted from HDF5 attributes
55
+ if not provided
56
+ image_cache_size:
57
+ size of the image cache to use when accessing image data
58
+ index_mapping:
59
+ select only a subset of input events, transparently reducing the
60
+ size of the dataset, possible data types are
61
+ - int `N`: use the first `N` events
62
+ - slice: use the events defined by a slice
63
+ - list: list of integers specifying the event indices to use
64
+ Numpy indexing rules apply. E.g. to only process the first
65
+ 100 events, set this to `100` or `slice(0, 100)`.
66
+ """
30
67
  # Init is in __setstate__ so we can pickle this class
31
68
  # and use it for multiprocessing.
32
69
  if isinstance(path, h5py.File):
33
70
  self.h5 = path
34
71
  path = path.filename
72
+
35
73
  self.__setstate__({"path": path,
36
74
  "pixel_size": pixel_size,
37
75
  "md5_5m": md5_5m,
@@ -40,6 +78,7 @@ class HDF5Data:
40
78
  "logs": logs,
41
79
  "tables": tables,
42
80
  "image_cache_size": image_cache_size,
81
+ "index_mapping": index_mapping,
43
82
  })
44
83
 
45
84
  def __contains__(self, item):
@@ -53,7 +92,7 @@ class HDF5Data:
53
92
 
54
93
  def __getitem__(self, feat):
55
94
  if feat in ["image", "image_bg", "mask"]:
56
- data = self.get_image_cache(feat)
95
+ data = self.get_image_cache(feat) # already index-mapped
57
96
  if data is None:
58
97
  raise KeyError(f"Feature '{feat}' not found in {self}!")
59
98
  else:
@@ -62,19 +101,25 @@ class HDF5Data:
62
101
  return self._cache_scalar[feat]
63
102
  elif (feat in self.h5["events"]
64
103
  and len(self.h5["events"][feat].shape) == 1): # cache scalar
65
- self._cache_scalar[feat] = self.h5["events"][feat][:]
104
+ if self.index_mapping is None:
105
+ idx_map = slice(None) # no mapping indices, just slice
106
+ else:
107
+ idx_map = get_mapping_indices(self.index_mapping)
108
+ self._cache_scalar[feat] = self.h5["events"][feat][idx_map]
66
109
  return self._cache_scalar[feat]
67
110
  else:
68
111
  if feat in self.h5["events"]:
69
112
  # Not cached (possibly slow)
70
113
  warnings.warn(f"Feature {feat} not cached (possibly slow)")
71
- return self.h5["events"][feat]
114
+ return get_mapped_object(
115
+ obj=self.h5["events"][feat],
116
+ index_mapping=self.index_mapping)
72
117
  else:
73
118
  # Check the basins
74
119
  for idx in range(len(self.basins)):
75
120
  bn, bn_features = self.get_basin_data(idx)
76
121
  if bn_features and feat in bn_features:
77
- return bn[feat]
122
+ return bn[feat] # already index-mapped
78
123
  # If we got here, then the feature data does not exist.
79
124
  raise KeyError(f"Feature '{feat}' not found in {self}!")
80
125
 
@@ -86,13 +131,14 @@ class HDF5Data:
86
131
  "logs": self.logs,
87
132
  "tables": self.tables,
88
133
  "basins": self.basins,
89
- "image_cache_size": self.image.cache_size
134
+ "image_cache_size": self.image.cache_size,
135
+ "index_mapping": self.index_mapping,
90
136
  }
91
137
 
92
138
  def __setstate__(self, state):
93
139
  # Make sure these properties exist (we rely on __init__, because
94
140
  # we want this class to be pickable and __init__ is not called by
95
- # `pickle.load`.
141
+ # `pickle.load`).
96
142
  # Cached properties
97
143
  self._feats = None
98
144
  self._keys = None
@@ -116,7 +162,7 @@ class HDF5Data:
116
162
  if self.md5_5m is None:
117
163
  if isinstance(self.path, pathlib.Path):
118
164
  # 5MB md5sum of input file
119
- self.md5_5m = md5sum(self.path, count=80)
165
+ self.md5_5m = md5sum(self.path, blocksize=65536, count=80)
120
166
  else:
121
167
  self.md5_5m = str(uuid.uuid4()).replace("-", "")
122
168
  self.meta = state["meta"]
@@ -140,37 +186,44 @@ class HDF5Data:
140
186
  if isinstance(self.meta[key], bytes):
141
187
  self.meta[key] = self.meta[key].decode("utf-8")
142
188
  # logs
143
- for key in h5.get("logs", []):
189
+ for key in sorted(h5.get("logs", {}).keys()):
144
190
  alog = list(h5["logs"][key])
145
191
  if alog:
146
192
  if isinstance(alog[0], bytes):
147
193
  alog = [ll.decode("utf") for ll in alog]
148
194
  self.logs[key] = alog
149
195
  # tables
150
- for tab in h5.get("tables", []):
196
+ for tab in sorted(h5.get("tables", {}).keys()):
151
197
  tabdict = {}
152
198
  for tkey in h5["tables"][tab].dtype.fields.keys():
153
199
  tabdict[tkey] = \
154
200
  np.array(h5["tables"][tab][tkey]).reshape(-1)
155
201
  self.tables[tab] = tabdict
156
202
  # basins
157
- for bnkey in h5.get("basins", []):
203
+ basins = []
204
+ for bnkey in h5.get("basins", {}).keys():
158
205
  bn_data = "\n".join(
159
206
  [s.decode() for s in h5["basins"][bnkey][:].tolist()])
160
207
  bn_dict = json.loads(bn_data)
161
- self.basins.append(bn_dict)
208
+ basins.append(bn_dict)
209
+ self.basins = sorted(basins, key=lambda x: x["name"])
162
210
 
163
211
  if state["pixel_size"] is not None:
164
212
  self.pixel_size = state["pixel_size"]
165
213
 
166
214
  self.image_cache_size = state["image_cache_size"]
167
215
 
216
+ self.index_mapping = state["index_mapping"]
217
+
168
218
  if self.h5 is None:
169
219
  self.h5 = h5py.File(self.path, libver="latest")
170
220
 
171
221
  def __len__(self):
172
222
  if self._len is None:
173
- self._len = self.h5.attrs["experiment:event count"]
223
+ if self.index_mapping is not None:
224
+ self._len = get_mapping_indices(self.index_mapping).size
225
+ else:
226
+ self._len = self.h5.attrs["experiment:event count"]
174
227
  return self._len
175
228
 
176
229
  @property
@@ -244,7 +297,9 @@ class HDF5Data:
244
297
  self.h5.close()
245
298
 
246
299
  def get_ppid(self):
247
- return self.get_ppid_from_ppkw({"pixel_size": self.pixel_size})
300
+ return self.get_ppid_from_ppkw(
301
+ {"pixel_size": self.pixel_size,
302
+ "index_mapping": self.index_mapping})
248
303
 
249
304
  @classmethod
250
305
  def get_ppid_code(cls):
@@ -255,20 +310,64 @@ class HDF5Data:
255
310
  # Data does not really fit into the PPID scheme we use for the rest
256
311
  # of the pipeline. This implementation here is custom.
257
312
  code = cls.get_ppid_code()
258
- kwid = f"p={kwargs['pixel_size']:.8f}".rstrip("0")
313
+ # pixel size
314
+ ppid_ps = f"{kwargs['pixel_size']:.8f}".rstrip("0")
315
+ # index mapping
316
+ ppid_im = cls.get_ppid_index_mapping(kwargs.get("index_mapping", None))
317
+ kwid = "^".join([f"p={ppid_ps}", f"i={ppid_im}"])
259
318
  return ":".join([code, kwid])
260
319
 
320
+ @staticmethod
321
+ def get_ppid_index_mapping(index_mapping):
322
+ """Return the pipeline identifier part for index mapping"""
323
+ im = index_mapping
324
+ if im is None:
325
+ dim = "0"
326
+ elif isinstance(im, numbers.Integral):
327
+ dim = f"{im}"
328
+ elif isinstance(im, slice):
329
+ dim = (f"{im.start if im.start is not None else 'n'}"
330
+ + f"-{im.stop if im.stop is not None else 'n'}"
331
+ + f"-{im.step if im.step is not None else 'n'}"
332
+ )
333
+ elif isinstance(im, (list, np.ndarray)):
334
+ idhash = hashlib.md5(
335
+ np.array(im, dtype=np.uint32).tobytes()).hexdigest()
336
+ dim = f"h-{idhash[:8]}"
337
+ else:
338
+ dim = "unknown"
339
+ return dim
340
+
261
341
  @staticmethod
262
342
  def get_ppkw_from_ppid(dat_ppid):
263
343
  # Data does not fit in the PPID scheme we use, but we still
264
344
  # would like to pass pixel_size to __init__ if we need it.
265
- code, pp_dat_kwargs = dat_ppid.split(":")
345
+ code, kwargs_str = dat_ppid.split(":")
266
346
  if code != HDF5Data.get_ppid_code():
267
347
  raise ValueError(f"Could not find data method '{code}'!")
268
- p, val = pp_dat_kwargs.split("=")
269
- if p != "p":
270
- raise ValueError(f"Invalid parameter '{p}'!")
271
- return {"pixel_size": float(val)}
348
+ kwitems = kwargs_str.split("^")
349
+ kwargs = {}
350
+ for item in kwitems:
351
+ var, val = item.split("=")
352
+ if var == "p":
353
+ kwargs["pixel_size"] = float(val)
354
+ elif var == "i":
355
+ if val.startswith("h-") or val == "unknown":
356
+ raise ValueError(f"Cannot invert index mapping {val}")
357
+ elif val == "0":
358
+ kwargs["index_mapping"] = None
359
+ elif val.count("-"):
360
+ start, stop, step = val.split("-")
361
+ kwargs["index_mapping"] = slice(
362
+ None if start == "n" else int(start),
363
+ None if stop == "n" else int(stop),
364
+ None if step == "n" else int(step)
365
+ )
366
+ else:
367
+ kwargs["index_mapping"] = int(val)
368
+ else:
369
+ raise ValueError(f"Invalid parameter '{var}'!")
370
+ return kwargs
272
371
 
273
372
  def get_basin_data(self, index):
274
373
  """Return HDF5Data info for a basin index in `self.basins`
@@ -298,7 +397,22 @@ class HDF5Data:
298
397
  if path is None:
299
398
  self._basin_data[index] = (None, None)
300
399
  else:
301
- h5dat = HDF5Data(path)
400
+ feat_basinmap = bn_dict.get("mapping", None)
401
+ if feat_basinmap is None:
402
+ # This is NOT a mapped basin.
403
+ index_mapping = self.index_mapping
404
+ else:
405
+ # This is a mapped basin. Create an indexing list.
406
+ if self.index_mapping is None:
407
+ # The current dataset is not mapped.
408
+ basinmap_idx = slice(None)
409
+ else:
410
+ # The current dataset is also mapped.
411
+ basinmap_idx = get_mapping_indices(self.index_mapping)
412
+ basinmap = self.h5[f"events/{feat_basinmap}"]
413
+ index_mapping = basinmap[basinmap_idx]
414
+
415
+ h5dat = HDF5Data(path, index_mapping=index_mapping)
302
416
  features = bn_dict.get("features")
303
417
  if features is None:
304
418
  # Only get the features from the actual HDF5 file.
@@ -323,20 +437,27 @@ class HDF5Data:
323
437
  if feat not in self._image_cache:
324
438
  if f"events/{feat}" in self.h5:
325
439
  ds = self.h5[f"events/{feat}"]
440
+ idx_map = self.index_mapping
326
441
  else:
442
+ idx_map = None
327
443
  # search all basins
328
444
  for idx in range(len(self.basins)):
329
- bndat, features = self.get_basin_data(idx)
445
+ bn_dat, features = self.get_basin_data(idx)
330
446
  if features is not None:
331
447
  if feat in features:
332
- ds = bndat.h5[f"events/{feat}"]
448
+ # HDF5 dataset
449
+ ds = bn_dat.h5[f"events/{feat}"]
450
+ # Index mapping (taken from the basins which
451
+ # already includes the mapping from the current
452
+ # instance).
453
+ idx_map = bn_dat.index_mapping
333
454
  break
334
455
  else:
335
456
  ds = None
336
457
 
337
458
  if ds is not None:
338
459
  image = HDF5ImageCache(
339
- h5ds=ds,
460
+ h5ds=get_mapped_object(obj=ds, index_mapping=idx_map),
340
461
  cache_size=self.image_cache_size,
341
462
  boolean=feat == "mask")
342
463
  else:
@@ -386,6 +507,7 @@ def concatenated_hdf5_data(paths: List[pathlib.Path],
386
507
  - If one of the input files does not contain a feature from the first
387
508
  input `paths`, then a `ValueError` is raised. Use the `features`
388
509
  argument to specify which features you need instead.
510
+ - Basins are not considered.
389
511
  """
390
512
  h5kwargs = {"mode": "w", "libver": "latest"}
391
513
  if isinstance(path_out, (pathlib.Path, str)):
@@ -432,6 +554,7 @@ def concatenated_hdf5_data(paths: List[pathlib.Path],
432
554
  if not isinstance(h5["events"][feat], h5py.Dataset):
433
555
  warnings.warn(
434
556
  f"Ignoring {feat}; not implemented yet!")
557
+ continue
435
558
  if feat in ["frame", "time"]:
436
559
  continue
437
560
  shapes.setdefault(feat, []).append(