dcnum 0.17.2__py3-none-any.whl → 0.18.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dcnum might be problematic. Click here for more details.

dcnum/logic/ctrl.py CHANGED
@@ -1,9 +1,11 @@
1
1
  import collections
2
2
  import datetime
3
+ import hashlib
3
4
  import json
4
5
  import logging
5
6
  from logging.handlers import QueueListener
6
7
  import multiprocessing as mp
8
+ import numbers
7
9
  import os
8
10
  import pathlib
9
11
  import platform
@@ -14,6 +16,7 @@ import traceback
14
16
  import uuid
15
17
 
16
18
  import h5py
19
+ import numpy as np
17
20
 
18
21
  from ..feat.feat_background.base import get_available_background_methods
19
22
  from ..feat.queue_event_extractor import QueueEventExtractor
@@ -382,6 +385,24 @@ class DCNumJobRunner(threading.Thread):
382
385
  hw.h5.attrs["pipeline:dcnum gate"] = self.ppdict["gate_id"]
383
386
  hw.h5.attrs["pipeline:dcnum hash"] = self.pphash
384
387
  hw.h5.attrs["pipeline:dcnum yield"] = self.event_count
388
+ # index mapping information
389
+ im = self.job.kwargs["data_kwargs"].get("index_mapping", None)
390
+ if im is None:
391
+ dim = "0"
392
+ elif isinstance(im, numbers.Number):
393
+ dim = f"{im}"
394
+ elif isinstance(im, slice):
395
+ dim = (f"{im.start if im.start is not None else 'n'}"
396
+ + f"-{im.stop if im.stop is not None else 'n'}"
397
+ + f"-{im.step if im.step is not None else 'n'}"
398
+ )
399
+ elif isinstance(im, (list, np.ndarray)):
400
+ idhash = hashlib.md5(
401
+ np.array(im, dtype=np.uint32).tobytes()).hexdigest()
402
+ dim = f"h-{idhash[:8]}"
403
+ else:
404
+ dim = "unknown"
405
+ hw.h5.attrs["pipeline:dcnum mapping"] = dim
385
406
  # regular metadata
386
407
  hw.h5.attrs["experiment:event count"] = self.event_count
387
408
  hw.h5.attrs["imaging:pixel size"] = self.draw.pixel_size
@@ -503,7 +524,7 @@ class DCNumJobRunner(threading.Thread):
503
524
  num_segmenters = 1
504
525
  elif seg_cls.hardware_processor == "cpu": # CPU segmenter
505
526
  # We could in principle set the number of slots to one and
506
- # jave both number of extractors and number of segmenters set
527
+ # have both number of extractors and number of segmenters set
507
528
  # to the total number of CPUs. However, we would need more RAM
508
529
  # (for caching the image data) and we also have more overhead.
509
530
  # Having two slots shared between all workers is more efficient.
@@ -522,10 +543,11 @@ class DCNumJobRunner(threading.Thread):
522
543
  slot_chunks = mp_spawn.Array("i", num_slots)
523
544
  slot_states = mp_spawn.Array("u", num_slots)
524
545
 
525
- # Initialize thread
546
+ # Initialize segmenter manager thread
526
547
  thr_segm = SegmenterManagerThread(
527
548
  segmenter=seg_cls(**self.job["segmenter_kwargs"]),
528
549
  image_data=imdat,
550
+ bg_off=self.dtin["bg_off"] if "bg_off" in self.dtin else None,
529
551
  slot_states=slot_states,
530
552
  slot_chunks=slot_chunks,
531
553
  debug=self.job["debug"],
@@ -13,5 +13,7 @@ class ExtendedJSONEncoder(json.JSONEncoder):
13
13
  return int(obj)
14
14
  elif isinstance(obj, np.bool_):
15
15
  return bool(obj)
16
+ elif isinstance(obj, slice):
17
+ return "PYTHON-SLICE", (obj.start, obj.stop, obj.step)
16
18
  # Let the base class default method raise the TypeError
17
19
  return json.JSONEncoder.default(self, obj)
dcnum/meta/ppid.py CHANGED
@@ -10,7 +10,7 @@ import warnings
10
10
 
11
11
  #: Increment this string if there are breaking changes that make
12
12
  #: previous pipelines unreproducible.
13
- DCNUM_PPID_GENERATION = "7"
13
+ DCNUM_PPID_GENERATION = "8"
14
14
 
15
15
 
16
16
  class ClassWithPPIDCapabilities(Protocol):
dcnum/read/__init__.py CHANGED
@@ -2,3 +2,4 @@
2
2
  from .cache import md5sum
3
3
  from .const import PROTECTED_FEATURES
4
4
  from .hdf5_data import HDF5Data, HDF5ImageCache, concatenated_hdf5_data
5
+ from .mapped import get_mapping_indices, get_mapped_object
dcnum/read/cache.py CHANGED
@@ -1,7 +1,9 @@
1
+ import abc
1
2
  import collections
2
3
  import functools
3
4
  import hashlib
4
5
  import pathlib
6
+ from typing import Tuple
5
7
  import warnings
6
8
 
7
9
  import h5py
@@ -13,41 +15,34 @@ class EmptyDatasetWarning(UserWarning):
13
15
  pass
14
16
 
15
17
 
16
- class HDF5ImageCache:
18
+ class BaseImageChunkCache(abc.ABC):
17
19
  def __init__(self,
18
- h5ds: h5py.Dataset,
20
+ shape: Tuple[int],
19
21
  chunk_size: int = 1000,
20
22
  cache_size: int = 2,
21
- boolean: bool = False):
22
- """An HDF5 image cache
23
-
24
- Deformability cytometry data files commonly contain image stacks
25
- that are chunked in various ways. Loading just a single image
26
- can be time-consuming, because an entire HDF5 chunk has to be
27
- loaded, decompressed and from that one image extracted. The
28
- `HDF5ImageCache` class caches the chunks from the HDF5 files
29
- into memory, making single-image-access very fast.
30
- """
31
- self.shape = h5ds.shape
23
+ ):
24
+ self.shape = shape
25
+ chunk_size = min(shape[0], chunk_size)
32
26
  self._len = self.shape[0]
33
- if self._len == 0:
34
- warnings.warn(f"Input image '{h5ds.name}' in "
35
- f"file {h5ds.file.filename} has zero length",
36
- EmptyDatasetWarning)
37
- # TODO:
38
- # - adjust chunking to multiples of the chunks in the dataset
39
- # (which might slightly speed up things)
40
- chunk_size = min(h5ds.shape[0], chunk_size)
41
- self.h5ds = h5ds
42
- self.chunk_size = chunk_size
43
- self.boolean = boolean
44
- self.cache_size = cache_size
45
27
  #: This is a FILO cache for the chunks
46
28
  self.cache = collections.OrderedDict()
47
29
  self.image_shape = self.shape[1:]
48
30
  self.chunk_shape = (chunk_size,) + self.shape[1:]
31
+ self.chunk_size = chunk_size
32
+ self.cache_size = cache_size
49
33
  self.num_chunks = int(np.ceil(self._len / (self.chunk_size or 1)))
50
34
 
35
+ def __getitem__(self, index):
36
+ chunk_index, sub_index = self._get_chunk_index_for_index(index)
37
+ return self.get_chunk(chunk_index)[sub_index]
38
+
39
+ def __len__(self):
40
+ return self._len
41
+
42
+ @abc.abstractmethod
43
+ def _get_chunk_data(self, chunk_slice):
44
+ """Implemented in subclass to obtain actual data"""
45
+
51
46
  def _get_chunk_index_for_index(self, index):
52
47
  if index < 0:
53
48
  index = self._len + index
@@ -59,26 +54,14 @@ class HDF5ImageCache:
59
54
  sub_index = index % self.chunk_size
60
55
  return chunk_index, sub_index
61
56
 
62
- def __getitem__(self, index):
63
- chunk_index, sub_index = self._get_chunk_index_for_index(index)
64
- return self.get_chunk(chunk_index)[sub_index]
65
-
66
- def __len__(self):
67
- return self._len
68
-
69
57
  def get_chunk(self, chunk_index):
70
58
  """Return one chunk of images"""
71
59
  if chunk_index not in self.cache:
72
- fslice = slice(self.chunk_size * chunk_index,
73
- self.chunk_size * (chunk_index + 1)
74
- )
75
- data = self.h5ds[fslice]
76
- if self.boolean:
77
- data = np.array(data, dtype=bool)
78
- self.cache[chunk_index] = data
79
- if len(self.cache) > self.cache_size:
60
+ if len(self.cache) >= self.cache_size:
80
61
  # Remove the first item
81
62
  self.cache.popitem(last=False)
63
+ data = self._get_chunk_data(self.get_chunk_slice(chunk_index))
64
+ self.cache[chunk_index] = data
82
65
  return self.cache[chunk_index]
83
66
 
84
67
  def get_chunk_size(self, chunk_index):
@@ -91,60 +74,77 @@ class HDF5ImageCache:
91
74
  raise IndexError(f"{self} only has {self.num_chunks} chunks!")
92
75
  return chunk_size
93
76
 
77
+ def get_chunk_slice(self, chunk_index):
78
+ """Return the slice corresponding to the chunk index"""
79
+ ch_slice = slice(self.chunk_size * chunk_index,
80
+ self.chunk_size * (chunk_index + 1)
81
+ )
82
+ return ch_slice
83
+
94
84
  def iter_chunks(self):
95
- size = self.h5ds.shape[0]
96
85
  index = 0
97
86
  chunk = 0
98
87
  while True:
99
88
  yield chunk
100
89
  chunk += 1
101
90
  index += self.chunk_size
102
- if index >= size:
91
+ if index >= self._len:
103
92
  break
104
93
 
105
94
 
106
- class ImageCorrCache:
95
+ class HDF5ImageCache(BaseImageChunkCache):
107
96
  def __init__(self,
108
- image: HDF5ImageCache,
109
- image_bg: HDF5ImageCache):
110
- self.image = image
111
- self.image_bg = image_bg
112
- self.chunk_size = image.chunk_size
113
- self.num_chunks = image.num_chunks
114
- self.h5ds = image.h5ds
115
- self.shape = image.shape
116
- self.chunk_shape = image.chunk_shape
117
- #: This is a FILO cache for the corrected image chunks
118
- self.cache = collections.OrderedDict()
119
- self.cache_size = image.cache_size
97
+ h5ds: h5py.Dataset,
98
+ chunk_size: int = 1000,
99
+ cache_size: int = 2,
100
+ boolean: bool = False):
101
+ """An HDF5 image cache
120
102
 
121
- def _get_chunk_index_for_index(self, index):
122
- if index < 0:
123
- index = len(self.h5ds) + index
124
- chunk_index = index // self.chunk_size
125
- sub_index = index % self.chunk_size
126
- return chunk_index, sub_index
103
+ Deformability cytometry data files commonly contain image stacks
104
+ that are chunked in various ways. Loading just a single image
105
+ can be time-consuming, because an entire HDF5 chunk has to be
106
+ loaded, decompressed and from that one image extracted. The
107
+ `HDF5ImageCache` class caches the chunks from the HDF5 files
108
+ into memory, making single-image-access very fast.
109
+ """
110
+ super(HDF5ImageCache, self).__init__(
111
+ shape=h5ds.shape,
112
+ chunk_size=chunk_size,
113
+ cache_size=cache_size)
114
+ # TODO:
115
+ # - adjust chunking to multiples of the chunks in the dataset
116
+ # (which might slightly speed up things)
117
+ self.h5ds = h5ds
118
+ self.boolean = boolean
127
119
 
128
- def __getitem__(self, index):
129
- chunk_index, sub_index = self._get_chunk_index_for_index(index)
130
- return self.get_chunk(chunk_index)[sub_index]
120
+ if self._len == 0:
121
+ warnings.warn(f"Input image '{h5ds.name}' in "
122
+ f"file {h5ds.file.filename} has zero length",
123
+ EmptyDatasetWarning)
131
124
 
132
- def __len__(self):
133
- return len(self.image)
125
+ def _get_chunk_data(self, chunk_slice):
126
+ data = self.h5ds[chunk_slice]
127
+ if self.boolean:
128
+ data = np.array(data, dtype=bool)
129
+ return data
134
130
 
135
- def get_chunk(self, chunk_index):
136
- if chunk_index not in self.cache:
137
- data = np.array(
138
- self.image.get_chunk(chunk_index), dtype=np.int16) \
139
- - self.image_bg.get_chunk(chunk_index)
140
- self.cache[chunk_index] = data
141
- if len(self.cache) > self.cache_size:
142
- # Remove the first item
143
- self.cache.popitem(last=False)
144
- return self.cache[chunk_index]
145
131
 
146
- def iter_chunks(self):
147
- return self.image.iter_chunks()
132
+ class ImageCorrCache(BaseImageChunkCache):
133
+ def __init__(self,
134
+ image: HDF5ImageCache,
135
+ image_bg: HDF5ImageCache):
136
+ super(ImageCorrCache, self).__init__(
137
+ shape=image.shape,
138
+ chunk_size=image.chunk_size,
139
+ cache_size=image.cache_size)
140
+ self.image = image
141
+ self.image_bg = image_bg
142
+
143
+ def _get_chunk_data(self, chunk_slice):
144
+ data = np.array(
145
+ self.image._get_chunk_data(chunk_slice), dtype=np.int16) \
146
+ - self.image_bg._get_chunk_data(chunk_slice)
147
+ return data
148
148
 
149
149
 
150
150
  @functools.cache
dcnum/read/const.py CHANGED
@@ -1,6 +1,7 @@
1
1
  #: Scalar features that apply to all events in a frame and which are
2
- #: not computed from image or image_bg data.
2
+ #: not computed for individual events.
3
3
  PROTECTED_FEATURES = [
4
+ "bg_off",
4
5
  "flow_rate",
5
6
  "frame",
6
7
  "g_force",
@@ -10,5 +11,7 @@ PROTECTED_FEATURES = [
10
11
  "time"
11
12
  ]
12
13
 
14
+ # User-defined features may be anything, but if the user needs something
15
+ # very specific for the pipeline, having them protected is a nice feature.
13
16
  for ii in range(10):
14
17
  PROTECTED_FEATURES.append(f"userdef{ii}")
dcnum/read/hdf5_data.py CHANGED
@@ -13,6 +13,7 @@ import numpy as np
13
13
 
14
14
  from .cache import HDF5ImageCache, ImageCorrCache, md5sum
15
15
  from .const import PROTECTED_FEATURES
16
+ from .mapped import get_mapped_object, get_mapping_indices
16
17
 
17
18
 
18
19
  class HDF5Data:
@@ -26,12 +27,47 @@ class HDF5Data:
26
27
  logs: Dict[List[str]] = None,
27
28
  tables: Dict[np.ndarray] = None,
28
29
  image_cache_size: int = 2,
30
+ index_mapping: int | slice | List | np.ndarray = None,
29
31
  ):
32
+ """
33
+
34
+ Parameters
35
+ ----------
36
+ path:
37
+ path to data file
38
+ pixel_size:
39
+ pixel size in µm
40
+ md5_5m:
41
+ MD5 sum of the first 5 MiB; computed if not provided
42
+ meta:
43
+ metadata dictionary; extracted from HDF5 attributes
44
+ if not provided
45
+ basins:
46
+ list of basin dictionaries; extracted from HDF5 attributes
47
+ if not provided
48
+ logs:
49
+ dictionary of logs; extracted from HDF5 attributes
50
+ if not provided
51
+ tables:
52
+ dictionary of tables; extracted from HDF5 attributes
53
+ if not provided
54
+ image_cache_size:
55
+ size of the image cache to use when accessing image data
56
+ index_mapping:
57
+ select only a subset of input events, transparently reducing the
58
+ size of the dataset, possible data types are
59
+ - int `N`: use the first `N` events
60
+ - slice: use the events defined by a slice
61
+ - list: list of integers specifying the event indices to use
62
+ Numpy indexing rules apply. E.g. to only process the first
63
+ 100 events, set this to `100` or `slice(0, 100)`.
64
+ """
30
65
  # Init is in __setstate__ so we can pickle this class
31
66
  # and use it for multiprocessing.
32
67
  if isinstance(path, h5py.File):
33
68
  self.h5 = path
34
69
  path = path.filename
70
+
35
71
  self.__setstate__({"path": path,
36
72
  "pixel_size": pixel_size,
37
73
  "md5_5m": md5_5m,
@@ -40,6 +76,7 @@ class HDF5Data:
40
76
  "logs": logs,
41
77
  "tables": tables,
42
78
  "image_cache_size": image_cache_size,
79
+ "index_mapping": index_mapping,
43
80
  })
44
81
 
45
82
  def __contains__(self, item):
@@ -53,7 +90,7 @@ class HDF5Data:
53
90
 
54
91
  def __getitem__(self, feat):
55
92
  if feat in ["image", "image_bg", "mask"]:
56
- data = self.get_image_cache(feat)
93
+ data = self.get_image_cache(feat) # already index-mapped
57
94
  if data is None:
58
95
  raise KeyError(f"Feature '{feat}' not found in {self}!")
59
96
  else:
@@ -62,19 +99,25 @@ class HDF5Data:
62
99
  return self._cache_scalar[feat]
63
100
  elif (feat in self.h5["events"]
64
101
  and len(self.h5["events"][feat].shape) == 1): # cache scalar
65
- self._cache_scalar[feat] = self.h5["events"][feat][:]
102
+ if self.index_mapping is None:
103
+ idx_map = slice(None) # no mapping indices, just slice
104
+ else:
105
+ idx_map = get_mapping_indices(self.index_mapping)
106
+ self._cache_scalar[feat] = self.h5["events"][feat][idx_map]
66
107
  return self._cache_scalar[feat]
67
108
  else:
68
109
  if feat in self.h5["events"]:
69
110
  # Not cached (possibly slow)
70
111
  warnings.warn(f"Feature {feat} not cached (possibly slow)")
71
- return self.h5["events"][feat]
112
+ return get_mapped_object(
113
+ obj=self.h5["events"][feat],
114
+ index_mapping=self.index_mapping)
72
115
  else:
73
116
  # Check the basins
74
117
  for idx in range(len(self.basins)):
75
118
  bn, bn_features = self.get_basin_data(idx)
76
119
  if bn_features and feat in bn_features:
77
- return bn[feat]
120
+ return bn[feat] # already index-mapped
78
121
  # If we got here, then the feature data does not exist.
79
122
  raise KeyError(f"Feature '{feat}' not found in {self}!")
80
123
 
@@ -86,13 +129,14 @@ class HDF5Data:
86
129
  "logs": self.logs,
87
130
  "tables": self.tables,
88
131
  "basins": self.basins,
89
- "image_cache_size": self.image.cache_size
132
+ "image_cache_size": self.image.cache_size,
133
+ "index_mapping": self.index_mapping,
90
134
  }
91
135
 
92
136
  def __setstate__(self, state):
93
137
  # Make sure these properties exist (we rely on __init__, because
94
138
  # we want this class to be pickable and __init__ is not called by
95
- # `pickle.load`.
139
+ # `pickle.load`).
96
140
  # Cached properties
97
141
  self._feats = None
98
142
  self._keys = None
@@ -116,7 +160,7 @@ class HDF5Data:
116
160
  if self.md5_5m is None:
117
161
  if isinstance(self.path, pathlib.Path):
118
162
  # 5MB md5sum of input file
119
- self.md5_5m = md5sum(self.path, count=80)
163
+ self.md5_5m = md5sum(self.path, blocksize=65536, count=80)
120
164
  else:
121
165
  self.md5_5m = str(uuid.uuid4()).replace("-", "")
122
166
  self.meta = state["meta"]
@@ -165,12 +209,17 @@ class HDF5Data:
165
209
 
166
210
  self.image_cache_size = state["image_cache_size"]
167
211
 
212
+ self.index_mapping = state["index_mapping"]
213
+
168
214
  if self.h5 is None:
169
215
  self.h5 = h5py.File(self.path, libver="latest")
170
216
 
171
217
  def __len__(self):
172
218
  if self._len is None:
173
- self._len = self.h5.attrs["experiment:event count"]
219
+ if self.index_mapping is not None:
220
+ self._len = get_mapping_indices(self.index_mapping).size
221
+ else:
222
+ self._len = self.h5.attrs["experiment:event count"]
174
223
  return self._len
175
224
 
176
225
  @property
@@ -255,20 +304,26 @@ class HDF5Data:
255
304
  # Data does not really fit into the PPID scheme we use for the rest
256
305
  # of the pipeline. This implementation here is custom.
257
306
  code = cls.get_ppid_code()
258
- kwid = f"p={kwargs['pixel_size']:.8f}".rstrip("0")
307
+ ppid_ps = f"{kwargs['pixel_size']:.8f}".rstrip("0")
308
+ kwid = "^".join([f"p={ppid_ps}"])
259
309
  return ":".join([code, kwid])
260
310
 
261
311
  @staticmethod
262
312
  def get_ppkw_from_ppid(dat_ppid):
263
313
  # Data does not fit in the PPID scheme we use, but we still
264
314
  # would like to pass pixel_size to __init__ if we need it.
265
- code, pp_dat_kwargs = dat_ppid.split(":")
315
+ code, kwargs_str = dat_ppid.split(":")
266
316
  if code != HDF5Data.get_ppid_code():
267
317
  raise ValueError(f"Could not find data method '{code}'!")
268
- p, val = pp_dat_kwargs.split("=")
269
- if p != "p":
270
- raise ValueError(f"Invalid parameter '{p}'!")
271
- return {"pixel_size": float(val)}
318
+ kwitems = kwargs_str.split("^")
319
+ kwargs = {}
320
+ for item in kwitems:
321
+ var, val = item.split("=")
322
+ if var == "p":
323
+ kwargs["pixel_size"] = float(val)
324
+ else:
325
+ raise ValueError(f"Invalid parameter '{var}'!")
326
+ return kwargs
272
327
 
273
328
  def get_basin_data(self, index):
274
329
  """Return HDF5Data info for a basin index in `self.basins`
@@ -298,7 +353,7 @@ class HDF5Data:
298
353
  if path is None:
299
354
  self._basin_data[index] = (None, None)
300
355
  else:
301
- h5dat = HDF5Data(path)
356
+ h5dat = HDF5Data(path, index_mapping=self.index_mapping)
302
357
  features = bn_dict.get("features")
303
358
  if features is None:
304
359
  # Only get the features from the actual HDF5 file.
@@ -336,7 +391,8 @@ class HDF5Data:
336
391
 
337
392
  if ds is not None:
338
393
  image = HDF5ImageCache(
339
- h5ds=ds,
394
+ h5ds=get_mapped_object(obj=ds,
395
+ index_mapping=self.index_mapping),
340
396
  cache_size=self.image_cache_size,
341
397
  boolean=feat == "mask")
342
398
  else:
@@ -386,6 +442,7 @@ def concatenated_hdf5_data(paths: List[pathlib.Path],
386
442
  - If one of the input files does not contain a feature from the first
387
443
  input `paths`, then a `ValueError` is raised. Use the `features`
388
444
  argument to specify which features you need instead.
445
+ - Basins are not considered.
389
446
  """
390
447
  h5kwargs = {"mode": "w", "libver": "latest"}
391
448
  if isinstance(path_out, (pathlib.Path, str)):
dcnum/read/mapped.py ADDED
@@ -0,0 +1,79 @@
1
+ import functools
2
+
3
+ import numbers
4
+
5
+ import h5py
6
+ import numpy as np
7
+
8
+
9
+ class MappedHDF5Dataset:
10
+ def __init__(self,
11
+ h5ds: h5py.Dataset,
12
+ mapping_indices: np.ndarray):
13
+ """An index-mapped object for accessing an HDF5 dataset
14
+
15
+ Parameters
16
+ ----------
17
+ h5ds: h5py.Dataset
18
+ HDF5 dataset from which to map data
19
+ mapping_indices: np.ndarray
20
+ numpy indexing array containing integer indices
21
+ """
22
+ self.h5ds = h5ds
23
+ self.mapping_indices = mapping_indices
24
+ self.shape = (mapping_indices.size,) + h5ds.shape[1:]
25
+
26
+ def __getitem__(self, idx):
27
+ if isinstance(idx, numbers.Integral):
28
+ return self.h5ds[self.mapping_indices[idx]]
29
+ else:
30
+ idx_mapped = self.mapping_indices[idx]
31
+ return self.h5ds[idx_mapped]
32
+
33
+
34
+ def get_mapping_indices(
35
+ index_mapping: numbers.Integral | slice | list | np.ndarray
36
+ ):
37
+ if isinstance(index_mapping, numbers.Integral):
38
+ return _get_mapping_indices_cached(index_mapping)
39
+ elif isinstance(index_mapping, slice):
40
+ return _get_mapping_indices_cached(
41
+ (index_mapping.start, index_mapping.stop, index_mapping.step))
42
+ elif isinstance(index_mapping, (np.ndarray, list)):
43
+ return np.array(index_mapping, dtype=np.uint32)
44
+ else:
45
+ raise ValueError(f"Invalid type for `index_mapping`: "
46
+ f"{type(index_mapping)} ({index_mapping})")
47
+
48
+
49
+ @functools.lru_cache(maxsize=100)
50
+ def _get_mapping_indices_cached(
51
+ index_mapping: numbers.Integral | tuple
52
+ ):
53
+ if isinstance(index_mapping, numbers.Integral):
54
+ return np.arange(index_mapping)
55
+ elif isinstance(index_mapping, tuple):
56
+ im_slice = slice(*index_mapping)
57
+ if im_slice.step is not None:
58
+ raise NotImplementedError("Slices with step not implemented yet")
59
+ if im_slice.stop is None or im_slice.start is None:
60
+ raise NotImplementedError(
61
+ "Slices must have start and stop defined")
62
+ return np.arange(im_slice.start, im_slice.stop)
63
+ elif isinstance(index_mapping, list):
64
+ return np.array(index_mapping, dtype=np.uint32)
65
+ else:
66
+ raise ValueError(f"Invalid type for cached `index_mapping`: "
67
+ f"{type(index_mapping)} ({index_mapping})")
68
+
69
+
70
+ def get_mapped_object(obj, index_mapping=None):
71
+ if index_mapping is None:
72
+ return obj
73
+ elif isinstance(obj, h5py.Dataset):
74
+ return MappedHDF5Dataset(
75
+ obj,
76
+ mapping_indices=get_mapping_indices(index_mapping))
77
+ else:
78
+ raise ValueError(f"No recipe to convert object of type {type(obj)} "
79
+ f"({obj}) to an index-mapped object")
dcnum/segm/segm_thresh.py CHANGED
@@ -16,7 +16,7 @@ class SegmentThresh(CPUSegmenter):
16
16
  Parameters
17
17
  ----------
18
18
  thresh: int
19
- grayscale threhold value for creating the mask image;
19
+ grayscale threshold value for creating the mask image;
20
20
  For a background-corrected image, pixels with values below
21
21
  this value are considered to be part of the mask.
22
22
  """
@@ -25,7 +25,7 @@ class SegmentThresh(CPUSegmenter):
25
25
  @staticmethod
26
26
  def segment_approach(image, *,
27
27
  thresh: float = -6):
28
- """Mask retrieval as it is done in Shape-In
28
+ """Mask retrieval using basic thresholding
29
29
 
30
30
  Parameters
31
31
  ----------
@@ -39,7 +39,7 @@ class SegmentThresh(CPUSegmenter):
39
39
  Returns
40
40
  -------
41
41
  mask: 2d boolean ndarray
42
- Mask image for the give index
42
+ Mask image for the given index
43
43
  """
44
44
  assert thresh < 0, "threshold values above zero not supported!"
45
45
  return image < thresh