cellfinder 1.6.0__py3-none-any.whl → 1.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,7 @@
1
1
  import argparse
2
2
 
3
+ from cellfinder.core import logger
4
+
3
5
  BRAINGLOBE_WORKFLOWS = "https://github.com/brainglobe/brainglobe-workflows"
4
6
  NEW_NAME = "brainmapper"
5
7
  BLOG_POST = "https://brainglobe.info/blog/version1/core_and_napari_merge.html"
@@ -36,7 +38,7 @@ def cli_catch() -> None:
36
38
  ),
37
39
  )
38
40
 
39
- print(
41
+ logger.warning(
40
42
  "Hey, it looks like you're trying to run the old command-line tool.",
41
43
  "This workflow has been renamed and moved -",
42
44
  " you can now find it in the brainglobe-workflows package:\n",
@@ -19,8 +19,8 @@ def main(
19
19
  signal_array: types.array,
20
20
  background_array: types.array,
21
21
  n_free_cpus: int,
22
- voxel_sizes: Tuple[int, int, int],
23
- network_voxel_sizes: Tuple[int, int, int],
22
+ voxel_sizes: Tuple[float, float, float],
23
+ network_voxel_sizes: Tuple[float, float, float],
24
24
  batch_size: int,
25
25
  cube_height: int,
26
26
  cube_width: int,
@@ -29,12 +29,58 @@ def main(
29
29
  model_weights: Optional[os.PathLike],
30
30
  network_depth: depth_type,
31
31
  max_workers: int = 3,
32
+ pin_memory: bool = False,
32
33
  *,
33
34
  callback: Optional[Callable[[int], None]] = None,
34
35
  ) -> List[Cell]:
35
36
  """
36
37
  Parameters
37
38
  ----------
39
+
40
+ points: List of Cell objects
41
+ The potential cells to classify.
42
+ signal_array : numpy.ndarray or dask array
43
+ 3D array representing the signal data in z, y, x order.
44
+ background_array : numpy.ndarray or dask array
45
+ 3D array representing the signal data in z, y, x order.
46
+ n_free_cpus : int
47
+ How many CPU cores to leave free.
48
+ voxel_sizes : 3-tuple of floats
49
+ Size of your voxels in the z, y, and x dimensions.
50
+ network_voxel_sizes : 3-tuple of floats
51
+ Size of the pre-trained network's voxels in the z, y, and x dimensions.
52
+ batch_size : int
53
+ How many potential cells to classify at one time. The GPU/CPU
54
+ memory must be able to contain at once this many data cubes for
55
+ the models. For performance-critical applications, tune to maximize
56
+ memory usage without running out. Check your GPU/CPU memory to verify
57
+ it's not full.
58
+ cube_height: int
59
+ The height of the data cube centered on the cell used for
60
+ classification. Defaults to `50`.
61
+ cube_width: int
62
+ The width of the data cube centered on the cell used for
63
+ classification. Defaults to `50`.
64
+ cube_depth: int
65
+ The depth of the data cube centered on the cell used for
66
+ classification. Defaults to `20`.
67
+ trained_model : Optional[Path]
68
+ Trained model file path (home directory (default) -> pretrained
69
+ weights).
70
+ model_weights : Optional[Path]
71
+ Model weights path (home directory (default) -> pretrained
72
+ weights).
73
+ network_depth: str
74
+ The network depth to use during classification. Defaults to `"50"`.
75
+ max_workers: int
76
+ The number of sub-processes to use for data loading / processing.
77
+ Defaults to 8.
78
+ pin_memory: bool
79
+ Pins data to be sent to the GPU to the CPU memory. This allows faster
80
+ GPU data speeds, but can only be used if the data used by the GPU can
81
+ stay in the CPU RAM while the GPU uses it. I.e. there's enough RAM.
82
+ Otherwise, if there's a risk of the RAM being paged, it shouldn't be
83
+ used. Defaults to False.
38
84
  callback : Callable[int], optional
39
85
  A callback function that is called during classification. Called with
40
86
  the batch number once that batch has been classified.
@@ -70,7 +116,7 @@ def main(
70
116
  )
71
117
 
72
118
  if trained_model and Path(trained_model).suffix == ".h5":
73
- print(
119
+ logger.warning(
74
120
  "Weights provided in place of the model, "
75
121
  "loading weights into default model."
76
122
  )
@@ -103,7 +149,7 @@ def main(
103
149
  points_list.append(cell)
104
150
 
105
151
  time_elapsed = datetime.now() - start_time
106
- print(
152
+ logger.info(
107
153
  "Classfication complete - all points done in : {}".format(time_elapsed)
108
154
  )
109
155
 
@@ -47,9 +47,19 @@ def get_model(
47
47
  f"Setting model weights according to: {model_weights}",
48
48
  )
49
49
  if model_weights is None:
50
- raise OSError("`model_weights` must be provided")
51
- model.load_weights(model_weights)
52
- return model
50
+ raise OSError(
51
+ "`model_weights` must be provided for inference "
52
+ "or continued training."
53
+ )
54
+ try:
55
+ model.load_weights(model_weights)
56
+ except (OSError, ValueError) as e:
57
+ raise ValueError(
58
+ f"Error loading weights: {model_weights}.\n"
59
+ "Provided weights don't match the model architecture.\n"
60
+ ) from e
61
+
62
+ return model
53
63
 
54
64
 
55
65
  def make_lists(
@@ -48,12 +48,12 @@ def main(
48
48
  save_planes: bool = False,
49
49
  plane_directory: Optional[str] = None,
50
50
  batch_size: Optional[int] = None,
51
- torch_device: str = "cpu",
52
- use_scipy: bool = True,
53
- split_ball_xy_size: int = 3,
54
- split_ball_z_size: int = 3,
51
+ torch_device: Optional[str] = None,
52
+ pin_memory: bool = False,
53
+ split_ball_xy_size: float = 6,
54
+ split_ball_z_size: float = 15,
55
55
  split_ball_overlap_fraction: float = 0.8,
56
- split_soma_diameter: int = 7,
56
+ n_splitting_iter: int = 10,
57
57
  *,
58
58
  callback: Optional[Callable[[int], None]] = None,
59
59
  ) -> List[Cell]:
@@ -62,69 +62,80 @@ def main(
62
62
 
63
63
  Parameters
64
64
  ----------
65
- signal_array : numpy.ndarray
66
- 3D array representing the signal data.
67
-
65
+ signal_array : numpy.ndarray or dask array
66
+ 3D array representing the signal data in z, y, x order.
68
67
  start_plane : int
69
- Index of the starting plane for detection.
70
-
68
+ First plane index to process (inclusive, to process a subset of the
69
+ data).
71
70
  end_plane : int
72
- Index of the ending plane for detection.
73
-
74
- voxel_sizes : Tuple[float, float, float]
75
- Tuple of voxel sizes in each dimension (z, y, x).
76
-
71
+ Last plane index to process (exclusive, to process a subset of the
72
+ data).
73
+ voxel_sizes : 3-tuple of floats
74
+ Size of your voxels in the z, y, and x dimensions (microns).
77
75
  soma_diameter : float
78
- Diameter of the soma in physical units.
79
-
76
+ The expected in-plane (xy) soma diameter (microns).
80
77
  max_cluster_size : float
81
- Maximum size of a cluster in physical units.
82
-
78
+ Largest detected cell cluster (in cubic um) where splitting
79
+ should be attempted. Clusters above this size will be labeled
80
+ as artifacts.
83
81
  ball_xy_size : float
84
- Size of the XY ball used for filtering in physical units.
85
-
82
+ 3d filter's in-plane (xy) filter ball size (microns).
86
83
  ball_z_size : float
87
- Size of the Z ball used for filtering in physical units.
88
-
84
+ 3d filter's axial (z) filter ball size (microns).
89
85
  ball_overlap_fraction : float
90
- Fraction of overlap allowed between balls.
91
-
86
+ 3d filter's fraction of the ball filter needed to be filled by
87
+ foreground voxels, centered on a voxel, to retain the voxel.
92
88
  soma_spread_factor : float
93
- Spread factor for soma size.
94
-
89
+ Cell spread factor for determining the largest cell volume before
90
+ splitting up cell clusters. Structures with spherical volume of
91
+ diameter `soma_spread_factor * soma_diameter` or less will not be
92
+ split.
95
93
  n_free_cpus : int
96
- Number of free CPU cores available for parallel processing.
97
-
94
+ How many CPU cores to leave free.
98
95
  log_sigma_size : float
99
- Size of the sigma for the log filter.
100
-
96
+ Gaussian filter width (as a fraction of soma diameter) used during
97
+ 2d in-plane Laplacian of Gaussian filtering.
101
98
  n_sds_above_mean_thresh : float
102
- Number of standard deviations above the mean threshold.
103
-
99
+ Intensity threshold (the number of standard deviations above
100
+ the mean) of the filtered 2d planes used to mark pixels as
101
+ foreground or background.
104
102
  outlier_keep : bool, optional
105
103
  Whether to keep outliers during detection. Defaults to False.
106
-
107
104
  artifact_keep : bool, optional
108
105
  Whether to keep artifacts during detection. Defaults to False.
109
-
110
106
  save_planes : bool, optional
111
107
  Whether to save the planes during detection. Defaults to False.
112
-
113
108
  plane_directory : str, optional
114
109
  Directory path to save the planes. Defaults to None.
115
-
116
- batch_size : int, optional
117
- The number of planes to process in each batch. Defaults to 1.
118
- For CPU, there's no benefit for a larger batch size. Only a memory
119
- usage increase. For CUDA, the larger the batch size the better the
120
- performance. Until it fills up the GPU memory - after which it
121
- becomes slower.
122
-
110
+ batch_size: int
111
+ The number of planes of the original data volume to process at
112
+ once. The GPU/CPU memory must be able to contain this many planes
113
+ for all the filters. For performance-critical applications, tune to
114
+ maximize memory usage without running out. Check your GPU/CPU memory
115
+ to verify it's not full.
123
116
  torch_device : str, optional
124
- The device on which to run the computation. By default, it's "cpu".
125
- To run on a gpu, specify the PyTorch device name, such as "cuda" to
126
- run on the first GPU.
127
-
117
+ The device on which to run the computation. If not specified (None),
118
+ "cuda" will be used if a GPU is available, otherwise "cpu".
119
+ You can also manually specify "cuda" or "cpu".
120
+ pin_memory: bool
121
+ Pins data to be sent to the GPU to the CPU memory. This allows faster
122
+ GPU data speeds, but can only be used if the data used by the GPU can
123
+ stay in the CPU RAM while the GPU uses it. I.e. there's enough RAM.
124
+ Otherwise, if there's a risk of the RAM being paged, it shouldn't be
125
+ used. Defaults to False.
126
+ split_ball_xy_size: float
127
+ Similar to `ball_xy_size`, except the value to use for the 3d
128
+ filter during cluster splitting.
129
+ split_ball_z_size: float
130
+ Similar to `ball_z_size`, except the value to use for the 3d filter
131
+ during cluster splitting.
132
+ split_ball_overlap_fraction: float
133
+ Similar to `ball_overlap_fraction`, except the value to use for the
134
+ 3d filter during cluster splitting.
135
+ n_splitting_iter: int
136
+ The number of iterations to run the 3d filtering on a cluster. Each
137
+ iteration reduces the cluster size by the voxels not retained in
138
+ the previous iteration.
128
139
  callback : Callable[int], optional
129
140
  A callback function that is called every time a plane has finished
130
141
  being processed. Called with the plane number that has finished.
@@ -132,9 +143,11 @@ def main(
132
143
  Returns
133
144
  -------
134
145
  List[Cell]
135
- List of detected cells.
146
+ List of detected cell candidates.
136
147
  """
137
148
  start_time = datetime.now()
149
+ if torch_device is None:
150
+ torch_device = "cuda" if torch.cuda.is_available() else "cpu"
138
151
  if batch_size is None:
139
152
  if torch_device == "cpu":
140
153
  batch_size = 4
@@ -155,6 +168,12 @@ def main(
155
168
  end_plane = min(len(signal_array), end_plane)
156
169
 
157
170
  torch_device = torch_device.lower()
171
+ # Use SciPy filtering on CPU (better performance); use PyTorch on GPU
172
+ if torch_device != "cuda":
173
+ use_scipy = True
174
+ else:
175
+ use_scipy = False
176
+
158
177
  batch_size = max(batch_size, 1)
159
178
  # brainmapper can pass them in as str
160
179
  voxel_sizes = list(map(float, voxel_sizes))
@@ -180,19 +199,16 @@ def main(
180
199
  plane_directory=plane_directory,
181
200
  batch_size=batch_size,
182
201
  torch_device=torch_device,
202
+ pin_memory=pin_memory,
203
+ n_splitting_iter=n_splitting_iter,
183
204
  )
184
205
 
185
206
  # replicate the settings specific to splitting, before we access anything
186
207
  # of the original settings, causing cached properties
187
208
  kwargs = dataclasses.asdict(settings)
188
- kwargs["ball_z_size_um"] = split_ball_z_size * settings.z_pixel_size
189
- kwargs["ball_xy_size_um"] = (
190
- split_ball_xy_size * settings.in_plane_pixel_size
191
- )
209
+ kwargs["ball_z_size_um"] = split_ball_z_size
210
+ kwargs["ball_xy_size_um"] = split_ball_xy_size
192
211
  kwargs["ball_overlap_fraction"] = split_ball_overlap_fraction
193
- kwargs["soma_diameter_um"] = (
194
- split_soma_diameter * settings.in_plane_pixel_size
195
- )
196
212
  # always run on cpu because copying to gpu overhead is likely slower than
197
213
  # any benefit for detection on smallish volumes
198
214
  kwargs["torch_device"] = "cpu"
@@ -231,6 +247,5 @@ def main(
231
247
 
232
248
  time_elapsed = datetime.now() - start_time
233
249
  s = f"Detection complete. Found {len(cells)} cells in {time_elapsed}"
234
- logger.debug(s)
235
- print(s)
250
+ logger.info(s)
236
251
  return cells
@@ -39,7 +39,7 @@ class TileProcessor:
39
39
  Number of standard deviations above the mean threshold to use for
40
40
  determining whether a voxel is bright.
41
41
  log_sigma_size : float
42
- Size of the sigma for the gaussian filter.
42
+ Size of the Gaussian sigma for the Laplacian of Gaussian filtering.
43
43
  soma_diameter : float
44
44
  Diameter of the soma in voxels.
45
45
  torch_device: str
@@ -80,23 +80,28 @@ class DetectionSettings:
80
80
 
81
81
  voxel_sizes: Tuple[float, float, float] = (1.0, 1.0, 1.0)
82
82
  """
83
- Tuple of voxel sizes in each dimension (z, y, x). We use this to convert
84
- from `um` to pixel sizes.
83
+ Tuple of voxel sizes (microns) in each dimension (z, y, x). We use this
84
+ to convert from `um` to pixel sizes.
85
85
  """
86
86
 
87
87
  soma_spread_factor: float = 1.4
88
- """Spread factor for soma size - how much it may stretch in the images."""
88
+ """
89
+ Cell spread factor for determining the largest cell volume before
90
+ splitting up cell clusters. Structures with spherical volume of
91
+ diameter `soma_spread_factor * soma_diameter` or less will not be
92
+ split.
93
+ """
89
94
 
90
95
  soma_diameter_um: float = 16
91
96
  """
92
- Diameter of a typical soma in um. Bright areas larger than this will be
93
- split.
97
+ Diameter of a typical soma in-plane (xy) in microns.
94
98
  """
95
99
 
96
100
  max_cluster_size_um3: float = 100_000
97
101
  """
98
- Maximum size of a cluster (bright area) that will be processed, in um.
99
- Larger bright areas are skipped as artifacts.
102
+ Largest detected cell cluster (in cubic um) where splitting
103
+ should be attempted. Clusters above this size will be labeled
104
+ as artifacts.
100
105
  """
101
106
 
102
107
  ball_xy_size_um: float = 6
@@ -116,17 +121,21 @@ class DetectionSettings:
116
121
 
117
122
  ball_overlap_fraction: float = 0.6
118
123
  """
119
- Fraction of overlap between a bright area and the spherical kernel,
120
- for the area to be considered a single ball.
124
+ Fraction of the 3d ball filter needed to be filled by foreground voxels,
125
+ centered on a voxel, to retain the voxel.
121
126
  """
122
127
 
123
128
  log_sigma_size: float = 0.2
124
- """Size of the sigma for the 2d Gaussian filter."""
129
+ """
130
+ Gaussian filter width (as a fraction of soma diameter) used during
131
+ 2d in-plane Laplacian of Gaussian filtering.
132
+ """
125
133
 
126
134
  n_sds_above_mean_thresh: float = 10
127
135
  """
128
- Number of standard deviations above the mean intensity to use for a
129
- threshold to define bright areas. Below it, it's not considered bright.
136
+ Intensity threshold (the number of standard deviations above
137
+ the mean) of the filtered 2d planes used to mark pixels as
138
+ foreground or background.
130
139
  """
131
140
 
132
141
  outlier_keep: bool = False
@@ -180,6 +189,14 @@ class DetectionSettings:
180
189
  to run on the first GPU.
181
190
  """
182
191
 
192
+ pin_memory: bool = False
193
+ """
194
+ Pins data to be sent to the GPU to the CPU memory. This allows faster GPU
195
+ data speeds, but can only be used if the data used by the GPU can stay in
196
+ the CPU RAM while the GPU uses it. I.e. there's enough RAM. Otherwise, if
197
+ there's a risk of the RAM being paged, it shouldn't be used.
198
+ """
199
+
183
200
  n_free_cpus: int = 2
184
201
  """
185
202
  Number of free CPU cores to keep available and not use during parallel
@@ -191,6 +208,8 @@ class DetectionSettings:
191
208
  """
192
209
  During the structure splitting phase we iteratively shrink the bright areas
193
210
  and re-filter with the 3d filter. This is the number of iterations to do.
211
+ Each iteration reduces the cluster size by the voxels not retained in the
212
+ previous iteration.
194
213
 
195
214
  This is a maximum because we also stop if there are no more structures left
196
215
  during any iteration.
@@ -78,11 +78,11 @@ class BallFilter:
78
78
  ----------
79
79
  plane_height, plane_width : int
80
80
  Height/width of the planes.
81
- ball_xy_size : int
82
- Diameter of the spherical kernel in the x/y dimensions.
83
- ball_z_size : int
84
- Diameter of the spherical kernel in the z dimension.
85
- Equal to the number of planes stacked to filter
81
+ ball_xy_size : float
82
+ Diameter of the spherical kernel (in microns) in the x/y dimensions.
83
+ ball_z_size : float
84
+ Diameter of the spherical kernel in the z dimension in microns.
85
+ Determines the number of planes stacked to filter
86
86
  the central plane of the stack.
87
87
  overlap_fraction : float
88
88
  The fraction of pixels within the spherical kernel that
@@ -1,3 +1,4 @@
1
+ from copy import copy
1
2
  from typing import List, Tuple, Type
2
3
 
3
4
  import numpy as np
@@ -224,6 +225,7 @@ def split_cells(
224
225
  where M is the number of individual cells and each centre is
225
226
  represented by its x, y, and z coordinates.
226
227
  """
228
+ settings = copy(settings)
227
229
  # these points are in x, y, z order columnwise, in absolute pixels
228
230
  orig_centre = get_structure_centre(cell_points)
229
231
 
@@ -140,7 +140,7 @@ class VolumeFilter:
140
140
  tensor = torch.empty(
141
141
  (batch_size, *self.settings.plane_shape),
142
142
  dtype=torch_dtype,
143
- pin_memory=not cpu,
143
+ pin_memory=not cpu and self.settings.pin_memory,
144
144
  device="cpu",
145
145
  )
146
146
 
@@ -6,6 +6,7 @@ import pooch
6
6
  from brainglobe_utils.general.config import get_config_obj
7
7
 
8
8
  from cellfinder import DEFAULT_CELLFINDER_DIRECTORY
9
+ from cellfinder.core import logger
9
10
  from cellfinder.core.tools.source_files import (
10
11
  default_configuration_path,
11
12
  user_specific_configuration_path,
@@ -74,7 +75,7 @@ def amend_user_configuration(new_model_path=None) -> None:
74
75
  new_model_path : Path, optional
75
76
  The path to the new model configuration.
76
77
  """
77
- print("(Over-)writing custom user configuration")
78
+ logger.info("(Over-)writing custom user configuration")
78
79
 
79
80
  original_config = default_configuration_path()
80
81
  new_config = user_specific_configuration_path()