cellmap-analyze 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. cellmap_analyze-0.0.1/LICENSE +28 -0
  2. cellmap_analyze-0.0.1/MANIFEST.in +1 -0
  3. cellmap_analyze-0.0.1/PKG-INFO +59 -0
  4. cellmap_analyze-0.0.1/README.md +24 -0
  5. cellmap_analyze-0.0.1/pyproject.toml +65 -0
  6. cellmap_analyze-0.0.1/setup.cfg +4 -0
  7. cellmap_analyze-0.0.1/setup.py +25 -0
  8. cellmap_analyze-0.0.1/src/cellmap_analyze/__init__.py +0 -0
  9. cellmap_analyze-0.0.1/src/cellmap_analyze/analyze/__init__.py +0 -0
  10. cellmap_analyze-0.0.1/src/cellmap_analyze/analyze/assign_to_cells.py +216 -0
  11. cellmap_analyze-0.0.1/src/cellmap_analyze/analyze/fit_lines_to_segmentations.py +157 -0
  12. cellmap_analyze-0.0.1/src/cellmap_analyze/analyze/measure.py +247 -0
  13. cellmap_analyze-0.0.1/src/cellmap_analyze/analyze/overlapping_voxels.py +203 -0
  14. cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/__init__.py +0 -0
  15. cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/bresenham3D.c +31879 -0
  16. cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/bresenham3D.pyx +231 -0
  17. cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/centers.cpp +12371 -0
  18. cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/centers.pyx +59 -0
  19. cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/impl/centers.hpp +67 -0
  20. cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/process_arrays.c +58764 -0
  21. cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/process_arrays.pyx +75 -0
  22. cellmap_analyze-0.0.1/src/cellmap_analyze/process/__init__.py +0 -0
  23. cellmap_analyze-0.0.1/src/cellmap_analyze/process/clean_connected_components.py +268 -0
  24. cellmap_analyze-0.0.1/src/cellmap_analyze/process/connected_components.py +620 -0
  25. cellmap_analyze-0.0.1/src/cellmap_analyze/process/contact_sites.py +223 -0
  26. cellmap_analyze-0.0.1/src/cellmap_analyze/process/fill_holes.py +273 -0
  27. cellmap_analyze-0.0.1/src/cellmap_analyze/process/filter_ids.py +192 -0
  28. cellmap_analyze-0.0.1/src/cellmap_analyze/process/mutex_watershed.py +337 -0
  29. cellmap_analyze-0.0.1/src/cellmap_analyze/util/__init__.py +0 -0
  30. cellmap_analyze-0.0.1/src/cellmap_analyze/util/block_util.py +59 -0
  31. cellmap_analyze-0.0.1/src/cellmap_analyze/util/dask_util.py +319 -0
  32. cellmap_analyze-0.0.1/src/cellmap_analyze/util/image_data_interface.py +256 -0
  33. cellmap_analyze-0.0.1/src/cellmap_analyze/util/information_holders.py +119 -0
  34. cellmap_analyze-0.0.1/src/cellmap_analyze/util/io_util.py +390 -0
  35. cellmap_analyze-0.0.1/src/cellmap_analyze/util/mask_util.py +204 -0
  36. cellmap_analyze-0.0.1/src/cellmap_analyze/util/measure_util.py +255 -0
  37. cellmap_analyze-0.0.1/src/cellmap_analyze/util/neuroglancer_util.py +75 -0
  38. cellmap_analyze-0.0.1/src/cellmap_analyze/util/zarr_util.py +113 -0
  39. cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/PKG-INFO +59 -0
  40. cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/SOURCES.txt +44 -0
  41. cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/dependency_links.txt +1 -0
  42. cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/entry_points.txt +9 -0
  43. cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/requires.txt +25 -0
  44. cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/top_level.txt +2 -0
  45. cellmap_analyze-0.0.1/src/cli/__init__.py +0 -0
  46. cellmap_analyze-0.0.1/src/cli/cli.py +106 -0
@@ -0,0 +1,28 @@
1
+ BSD 3-Clause License
2
+
3
+ Copyright (c) 2025, CellMap Project Team
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ 3. Neither the name of the copyright holder nor the names of its
16
+ contributors may be used to endorse or promote products derived from
17
+ this software without specific prior written permission.
18
+
19
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1 @@
1
+ include src/cellmap_analyze/cythonizing/*.pyx
@@ -0,0 +1,59 @@
1
+ Metadata-Version: 2.4
2
+ Name: cellmap-analyze
3
+ Version: 0.0.1
4
+ Summary: Code to perform analysis on segmentations like those produced by CellMap
5
+ Author-email: David Ackerman <ackermand@janelia.hhmi.org>
6
+ Maintainer-email: David Ackerman <ackermand@janelia.hhmi.org>
7
+ Requires-Python: <3.13,>=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: bokeh>=3.1.0
11
+ Requires-Dist: fastremap
12
+ Requires-Dist: funlib.geometry
13
+ Requires-Dist: funlib.persistence==0.3.0
14
+ Requires-Dist: numpy
15
+ Requires-Dist: pandas
16
+ Requires-Dist: connected-components-3d
17
+ Requires-Dist: dask[distributed]
18
+ Requires-Dist: dask-jobqueue
19
+ Requires-Dist: fastmorph
20
+ Requires-Dist: scipy
21
+ Requires-Dist: scikit-image
22
+ Requires-Dist: tensorstore
23
+ Requires-Dist: tqdm
24
+ Requires-Dist: zarr==2.18.5
25
+ Requires-Dist: numcodecs==0.13.0
26
+ Requires-Dist: mwatershed
27
+ Requires-Dist: pyarrow
28
+ Requires-Dist: neuroglancer
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest; extra == "dev"
31
+ Requires-Dist: pytest-cov; extra == "dev"
32
+ Requires-Dist: coverage; extra == "dev"
33
+ Requires-Dist: build; extra == "dev"
34
+ Dynamic: license-file
35
+
36
+ ![CI](https://github.com/janelia-cellmap/cellmap-analyze/actions/workflows/tests.yml/badge.svg)
37
+ [![codecov](https://codecov.io/gh/janelia-cellmap/cellmap-analyze/branch/refactor_for_release/graph/badge.svg)](https://codecov.io/gh/janelia-cellmap/cellmap-analyze)
38
+
39
+ # Tools For Analyzing Large 3D Datasets
40
+
41
+ This repository is a set of tools for processing and analyzing terabyte size 3D segmentation datasets using Dask. Processing tools involve reading in dataset(s) and outputting another - processed - dataset. These processing tools include the calculation of:
42
+
43
+ 1. `Connected Components`: includes thresholding and masking
44
+ 2. `Clean Connected Components`: tools for cleaning up an existing segmentation
45
+ 3. `Contact Sites`: includes setting a distance for contact sites
46
+ 4. `Filling holes`: fills holes in segmentations
47
+ 5. `Filtering ids`: filters segmented ids to remove unwanted ones.
48
+
49
+ In addition, there are also tools for analysis of the 3D datasets including:
50
+
51
+ 1. `Measurement`: measures a variety of properties of the segmented ids (volume, surface area, etc.) as well as properties of contact sites (volume, surface area, contacting objects, etc.).
52
+ 2. `Fitting lines to segmentations`: useful for cylindrical-type objects.
53
+ 3. `Assigning to cells`: Assigns objects to the cells they are in based on the center of mass lcoations of the cells.
54
+
55
+ TODO: Include a detailed description of installation and running the code
56
+ </div>
57
+
58
+ ### Acknowledgements
59
+ Code for finding centers was taken from [funlib.evaluate](https://github.com/funkelab/funlib.evaluate).
@@ -0,0 +1,24 @@
1
+ ![CI](https://github.com/janelia-cellmap/cellmap-analyze/actions/workflows/tests.yml/badge.svg)
2
+ [![codecov](https://codecov.io/gh/janelia-cellmap/cellmap-analyze/branch/refactor_for_release/graph/badge.svg)](https://codecov.io/gh/janelia-cellmap/cellmap-analyze)
3
+
4
+ # Tools For Analyzing Large 3D Datasets
5
+
6
+ This repository is a set of tools for processing and analyzing terabyte size 3D segmentation datasets using Dask. Processing tools involve reading in dataset(s) and outputting another - processed - dataset. These processing tools include the calculation of:
7
+
8
+ 1. `Connected Components`: includes thresholding and masking
9
+ 2. `Clean Connected Components`: tools for cleaning up an existing segmentation
10
+ 3. `Contact Sites`: includes setting a distance for contact sites
11
+ 4. `Filling holes`: fills holes in segmentations
12
+ 5. `Filtering ids`: filters segmented ids to remove unwanted ones.
13
+
14
+ In addition, there are also tools for analysis of the 3D datasets including:
15
+
16
+ 1. `Measurement`: measures a variety of properties of the segmented ids (volume, surface area, etc.) as well as properties of contact sites (volume, surface area, contacting objects, etc.).
17
+ 2. `Fitting lines to segmentations`: useful for cylindrical-type objects.
18
+ 3. `Assigning to cells`: Assigns objects to the cells they are in based on the center of mass lcoations of the cells.
19
+
20
+ TODO: Include a detailed description of installation and running the code
21
+ </div>
22
+
23
+ ### Acknowledgements
24
+ Code for finding centers was taken from [funlib.evaluate](https://github.com/funkelab/funlib.evaluate).
@@ -0,0 +1,65 @@
1
+ [project]
2
+ name = "cellmap-analyze"
3
+ version = "0.0.1"
4
+ description = "Code to perform analysis on segmentations like those produced by CellMap"
5
+ readme = "README.md"
6
+ license-files = ["LICENSE"]
7
+ requires-python = ">=3.10,<3.13"
8
+
9
+ authors = [
10
+ { name = "David Ackerman", email = "ackermand@janelia.hhmi.org" },
11
+ ]
12
+
13
+ maintainers = [
14
+ { name = "David Ackerman", email = "ackermand@janelia.hhmi.org" },
15
+ ]
16
+
17
+ dependencies = [
18
+ "bokeh>=3.1.0",
19
+ "fastremap",
20
+ "funlib.geometry",
21
+ "funlib.persistence==0.3.0",
22
+ "numpy",
23
+ "pandas",
24
+ "connected-components-3d",
25
+ "dask[distributed]",
26
+ "dask-jobqueue",
27
+ "fastmorph",
28
+ "scipy",
29
+ "scikit-image",
30
+ "tensorstore",
31
+ "tqdm",
32
+ "zarr==2.18.5",
33
+ "numcodecs==0.13.0",
34
+ "mwatershed",
35
+ "pyarrow",
36
+ "neuroglancer",
37
+ ]
38
+
39
+ [project.optional-dependencies]
40
+ dev = ["pytest", "pytest-cov", "coverage", "build"]
41
+
42
+ [project.scripts]
43
+ connected-components = "cli.cli:connected_components"
44
+ clean-connected-components = "cli.cli:clean_connected_components"
45
+ contact-sites = "cli.cli:contact_sites"
46
+ mutex-watershed = "cli.cli:mutex_watershed"
47
+ filter-ids = "cli.cli:filter_ids"
48
+ measure = "cli.cli:measure"
49
+ fit-lines-to-segmentations = "cli.cli:fit_lines_to_segmentations"
50
+ assign-to-cells = "cli.cli:assign_to_cells"
51
+
52
+ [tool.setuptools]
53
+ package-dir = { "" = "src" }
54
+
55
+ [tool.setuptools.packages.find]
56
+ where = ["src"]
57
+
58
+ [build-system]
59
+ requires = [
60
+ "setuptools>=61",
61
+ "wheel",
62
+ "Cython>=0.29",
63
+ "numpy"
64
+ ]
65
+ build-backend = "setuptools.build_meta"
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,25 @@
1
+ from setuptools import setup, Extension
2
+ from Cython.Build import cythonize
3
+ import numpy as np
4
+
5
+ extensions = [
6
+ Extension(
7
+ "cellmap_analyze.cythonizing.bresenham3D",
8
+ ["src/cellmap_analyze/cythonizing/bresenham3D.pyx"],
9
+ ),
10
+ Extension(
11
+ "cellmap_analyze.cythonizing.process_arrays",
12
+ ["src/cellmap_analyze/cythonizing/process_arrays.pyx"],
13
+ ),
14
+ Extension(
15
+ "cellmap_analyze.cythonizing.centers",
16
+ ["src/cellmap_analyze/cythonizing/centers.pyx"],
17
+ language="c++",
18
+ include_dirs=[np.get_include()],
19
+ extra_compile_args=["-std=c++11"],
20
+ ),
21
+ ]
22
+
23
+ setup(
24
+ ext_modules=cythonize(extensions, language_level="3"),
25
+ )
File without changes
@@ -0,0 +1,216 @@
1
+ # %%
2
+ from typing import Union, List
3
+ from cellmap_analyze.util import io_util
4
+ from cellmap_analyze.util.image_data_interface import (
5
+ ImageDataInterface,
6
+ )
7
+ import logging
8
+ import pandas as pd
9
+ import numpy as np
10
+ import os
11
+ from scipy import spatial
12
+ from tqdm import tqdm
13
+ import fastremap
14
+ import fastmorph
15
+
16
+ logging.basicConfig(
17
+ format="%(asctime)s %(levelname)-8s %(message)s",
18
+ level=logging.INFO,
19
+ datefmt="%Y-%m-%d %H:%M:%S",
20
+ )
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ class AssignToCells:
25
+ def __init__(
26
+ self,
27
+ organelle_csvs: Union[str, List[str]],
28
+ cell_ds_path: str,
29
+ output_path: str,
30
+ cell_assignment_type: int = 0,
31
+ iteration_distance_nm=10_000,
32
+ ):
33
+ if isinstance(organelle_csvs, str):
34
+ organelle_csvs = [organelle_csvs]
35
+
36
+ self.organelle_info_dict = {}
37
+
38
+ for organelle_csv in organelle_csvs:
39
+ df = pd.read_csv(organelle_csv)
40
+ if "Total Objects" in df.columns:
41
+ # delete last two columns of dataframe
42
+ df = df.iloc[:, :-2]
43
+ self.organelle_info_dict[organelle_csv] = df
44
+
45
+ self.cell_idi = ImageDataInterface(cell_ds_path)
46
+ self.cell_assignment_type = cell_assignment_type
47
+ self.output_path = output_path
48
+ self.iteration_distance_nm = iteration_distance_nm
49
+
50
+ @staticmethod
51
+ def assign_to_containing_cell(cell_idi, df):
52
+ cell_data = cell_idi.to_ndarray_ts()
53
+ coms = df[["COM Z (nm)", "COM Y (nm)", "COM X (nm)"]].to_numpy()
54
+ # Already have top left corners aligned since in measure_util get_region_properties we already center on voxel so that top left corners are aligned
55
+ inds = np.astype(coms // cell_idi.voxel_size, int)
56
+ in_bounds = np.all((inds >= cell_idi.domain.inclusive_min), axis=1) & np.all(
57
+ (inds < cell_idi.domain.exclusive_max), axis=1
58
+ )
59
+
60
+ df.loc[in_bounds, "Cell ID"] = cell_data[
61
+ inds[in_bounds, 0], inds[in_bounds, 1], inds[in_bounds, 2]
62
+ ]
63
+ df["Cell ID"] = df["Cell ID"].astype(int)
64
+
65
+ @staticmethod
66
+ def assign_to_n_nearest_cells(cell_idi, df, n, iteration_distance_nm):
67
+ coms = df[["COM Z (nm)", "COM Y (nm)", "COM X (nm)"]].to_numpy()
68
+ inds = np.astype(coms // cell_idi.voxel_size, int)
69
+ cell_data = cell_idi.to_ndarray_ts()
70
+ if len(fastremap.unique(cell_data[cell_data > 0])) < n:
71
+ raise ValueError(
72
+ f"Number of unique cell ids in the segmentation ({fastremap.unique(cell_data[cell_data > 0])}) is less than n ({n})."
73
+ " Please choose a smaller n or use a different assignment method."
74
+ )
75
+
76
+ num_points = len(coms)
77
+ boundaries = cell_data - fastmorph.erode(cell_data, erode_border=False)
78
+ unique_ids = fastremap.unique(boundaries[boundaries > 0])
79
+
80
+ # Initialize arrays to store the n closest distances and corresponding ids.
81
+ # Every query point gets an array of size n.
82
+ closest_distances = np.full((num_points, n), np.inf)
83
+ closest_ids = np.zeros((num_points, n), dtype=int)
84
+
85
+ boundary_coords = np.argwhere(boundaries)
86
+ boundary_ids = boundaries[
87
+ boundary_coords[:, 0], boundary_coords[:, 1], boundary_coords[:, 2]
88
+ ]
89
+
90
+ maximum_distance = iteration_distance_nm
91
+ iteration = 0
92
+
93
+ # Continue looping until every query point has n finite (non-inf) distances.
94
+ while np.any(np.any(np.isinf(closest_distances), axis=1)):
95
+ iteration += 1
96
+ # Global update mask: select only those query points that still have at least one np.inf.
97
+ global_update_mask = np.any(np.isinf(closest_distances), axis=1)
98
+ if not np.any(global_update_mask):
99
+ break
100
+
101
+ # Loop over each unique boundary id.
102
+ for unique_id in tqdm(unique_ids):
103
+ # Create an update mask for query points that need updates and do not already have this unique_id.
104
+ # We check along the row: if the current candidate list already contains this unique_id, skip updating it.
105
+ already_has_id = np.any(closest_ids == unique_id, axis=1)
106
+ update_mask = global_update_mask & ~already_has_id
107
+ if not np.any(update_mask):
108
+ continue
109
+
110
+ # For the current unique object, get its boundary voxel coordinates, adjust by 0.5 for centering and scale.
111
+ coords = (
112
+ boundary_coords[boundary_ids == unique_id] + 0.5
113
+ ) * cell_idi.voxel_size
114
+ tree = spatial.KDTree(coords)
115
+
116
+ # Query only for the points (coms) that need updating.
117
+ current_distances, _ = tree.query(
118
+ coms[update_mask], distance_upper_bound=maximum_distance * iteration
119
+ )
120
+ # Check if coms are within a cell
121
+ updated_inds = inds[update_mask]
122
+ in_bounds = np.all(
123
+ (updated_inds >= cell_idi.domain.inclusive_min), axis=1
124
+ ) & np.all((updated_inds < cell_idi.domain.exclusive_max), axis=1)
125
+ valid_inds = updated_inds[in_bounds]
126
+
127
+ # Initialize an array of False of the same length as updated_inds.
128
+ within_cell = np.full(updated_inds.shape[0], False, dtype=bool)
129
+
130
+ # For the indices that are in bounds, assign the comparison result.
131
+ within_cell[in_bounds] = (
132
+ cell_data[valid_inds[:, 0], valid_inds[:, 1], valid_inds[:, 2]]
133
+ == unique_id
134
+ )
135
+
136
+ # If the com is within a cell, set the distance to 0
137
+ current_distances[within_cell] = 0
138
+
139
+ # Combine the current n best distances with the new candidate (this gives n+1 candidates per query point).
140
+ combined_distances = np.column_stack(
141
+ [closest_distances[update_mask], current_distances]
142
+ )
143
+ combined_ids = np.column_stack(
144
+ [
145
+ closest_ids[update_mask],
146
+ np.full(np.sum(update_mask), unique_id, dtype=int),
147
+ ]
148
+ )
149
+
150
+ # For each query point, sort candidates so that the smallest distances come first.
151
+ sort_order = np.argsort(combined_distances, axis=1)
152
+ sorted_distances = np.take_along_axis(
153
+ combined_distances, sort_order, axis=1
154
+ )
155
+ sorted_ids = np.take_along_axis(combined_ids, sort_order, axis=1)
156
+
157
+ # Update the candidate arrays for only the query points that needed an update.
158
+ closest_distances[update_mask] = sorted_distances[:, :n]
159
+ closest_ids[update_mask] = sorted_ids[:, :n]
160
+
161
+ # Update the DataFrame columns.
162
+ if n > 1:
163
+ df["Cell ID"] = [row.tolist() for row in closest_ids]
164
+ df["Cell Distance (nm)"] = [row.tolist() for row in closest_distances]
165
+ else:
166
+ df["Cell ID"] = closest_ids[:, 0]
167
+ df["Cell Distance (nm)"] = closest_distances[:, 0]
168
+
169
+ def assign_to_cells(self):
170
+ with io_util.Timing_Messager("Assigning objects to cells", logger):
171
+ for organelle_csv, df in self.organelle_info_dict.items():
172
+ # get filename from organelle_csv
173
+ filename = os.path.basename(organelle_csv)
174
+ if filename == "cell.csv":
175
+ continue
176
+ df["Cell ID"] = 0
177
+ if "er.csv" in organelle_csv:
178
+ df["Cell ID"] = df["Object ID"]
179
+ continue
180
+ if self.cell_assignment_type == 0:
181
+ self.assign_to_containing_cell(self.cell_idi, df)
182
+ continue
183
+ df["Cell Distance (nm)"] = 0
184
+ self.assign_to_n_nearest_cells(
185
+ self.cell_idi,
186
+ df,
187
+ self.cell_assignment_type,
188
+ self.iteration_distance_nm,
189
+ )
190
+
191
+ def write_updated_csvs(self):
192
+ with io_util.Timing_Messager("Writing out updated dataframes", logger):
193
+ os.makedirs(self.output_path, exist_ok=True)
194
+ for csv, df in self.organelle_info_dict.items():
195
+ csv_name = os.path.basename(csv.split(".csv")[0])
196
+ output_path = self.output_path
197
+ if csv_name.endswith("contacts"): # pragma: no cover
198
+ output_path = self.output_path + "/contact_sites/"
199
+ os.makedirs(output_path, exist_ok=True)
200
+
201
+ if self.cell_assignment_type == 0:
202
+ output_name = (
203
+ f"{output_path}/{csv_name}_assigned_to_containing_cell"
204
+ )
205
+ elif self.cell_assignment_type == 1:
206
+ output_name = f"{output_path}/{csv_name}_assigned_to_nearest_cell"
207
+ else:
208
+ output_name = f"{output_path}/{csv_name}_assigned_to_{self.cell_assignment_type}_nearest_cells"
209
+ df["Object ID"] = df["Object ID"].astype(
210
+ int
211
+ ) # in case was converted to float
212
+ df.to_csv(output_name + ".csv", index=False)
213
+
214
+ def get_cell_assignments(self):
215
+ self.assign_to_cells()
216
+ self.write_updated_csvs()
@@ -0,0 +1,157 @@
1
+ # %%
2
+ from funlib.geometry import Roi
3
+ import numpy as np
4
+ from cellmap_analyze.util.dask_util import (
5
+ dask_computer,
6
+ guesstimate_npartitions,
7
+ start_dask,
8
+ )
9
+ from cellmap_analyze.util import io_util
10
+ from cellmap_analyze.util.image_data_interface import ImageDataInterface
11
+ import logging
12
+ import pandas as pd
13
+ import dask.dataframe as dd
14
+ from cellmap_analyze.util.neuroglancer_util import write_out_annotations
15
+ import fastremap
16
+ import cc3d
17
+
18
+ logging.basicConfig(
19
+ format="%(asctime)s %(levelname)-8s %(message)s",
20
+ level=logging.INFO,
21
+ datefmt="%Y-%m-%d %H:%M:%S",
22
+ )
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ class FitLinesToSegmentations:
27
+ def __init__(
28
+ self,
29
+ input_csv,
30
+ input_path,
31
+ output_csv=None,
32
+ output_annotations_dir=None,
33
+ num_workers=8,
34
+ ):
35
+ self.df = pd.read_csv(input_csv) # , nrows=1000)
36
+ self.segmentation_idi = ImageDataInterface(input_path)
37
+ self.voxel_size = self.segmentation_idi.voxel_size
38
+ self.num_workers = num_workers
39
+ self.output_csv = output_csv
40
+ if self.output_csv is None:
41
+ self.output_csv = input_csv.replace(".csv", "_lines.csv")
42
+
43
+ self.output_annotations_dir = output_annotations_dir
44
+ self.compute_args = {}
45
+ if self.num_workers == 1:
46
+ self.compute_args = {"scheduler": "single-threaded"}
47
+
48
+ @staticmethod
49
+ def find_min_max_projected_points(points, line_point, line_direction):
50
+ # chatgpt
51
+ line_direction = line_direction / np.linalg.norm(
52
+ line_direction
53
+ ) # Normalize direction vector
54
+
55
+ # Calculate the vector from line_point to each point
56
+ point_vectors = points - line_point
57
+
58
+ # Calculate the projection scalar for each point using dot product and broadcasting
59
+ projection_scalars = np.sum(point_vectors * line_direction, axis=1)
60
+
61
+ # Calculate the projected points for each point
62
+ projected_points = (
63
+ line_point + projection_scalars[:, np.newaxis] * line_direction
64
+ )
65
+
66
+ # Find the minimum and maximum projection scalar indices
67
+ min_projection_idx = np.argmin(projection_scalars)
68
+ max_projection_idx = np.argmax(projection_scalars)
69
+
70
+ return (
71
+ projected_points[min_projection_idx],
72
+ projected_points[max_projection_idx],
73
+ )
74
+
75
+ @staticmethod
76
+ def fit_line_to_points(points, voxel_size, offset, line_origin):
77
+ # fit line to object voxels
78
+ _, _, vv = np.linalg.svd(points - np.mean(points, axis=0), full_matrices=False)
79
+ line_direction = vv[0]
80
+
81
+ # find endpoints of line segment so that we can write it as neuroglancer annotations
82
+ start_point, end_point = FitLinesToSegmentations.find_min_max_projected_points(
83
+ points * voxel_size + offset,
84
+ line_origin,
85
+ line_direction,
86
+ )
87
+
88
+ return start_point, end_point
89
+
90
+ @staticmethod
91
+ def fit_line_to_object(data, id, voxel_size, offset):
92
+ # only take largest component
93
+ data = cc3d.connected_components(data == id, connectivity=6, binary_image=True)
94
+ ids, counts = fastremap.unique(data[data > 0], return_counts=True)
95
+ id = ids[np.argmax(counts)]
96
+ points = np.column_stack(np.where(data == id))
97
+ com = np.mean(points, axis=0) * voxel_size + offset
98
+ start_point, end_point = FitLinesToSegmentations.fit_line_to_points(
99
+ points, voxel_size, offset, com
100
+ )
101
+ return start_point, end_point
102
+
103
+ def fit_lines_to_objects(self, df):
104
+ results_df = []
105
+ for _, row in df.iterrows():
106
+ id = row["Object ID"]
107
+ box_min = np.array([row[f"MIN {d} (nm)"] for d in ["Z", "Y", "X"]])
108
+ box_max = np.array([row[f"MAX {d} (nm)"] for d in ["Z", "Y", "X"]])
109
+ # define an roi to actually ecompass the bounding box
110
+ roi = Roi(
111
+ box_min - self.voxel_size, (box_max - box_min) + self.voxel_size * 2
112
+ )
113
+ data = self.segmentation_idi.to_ndarray_ts(roi)
114
+ line_start, line_end = FitLinesToSegmentations.fit_line_to_object(
115
+ data, id, self.voxel_size, roi.offset
116
+ )
117
+ result_df = pd.DataFrame([row])
118
+
119
+ for point_string, point_coords in zip(
120
+ ["Start", "End"], [line_start, line_end]
121
+ ):
122
+ for dim_idx, dim in enumerate(["Z", "Y", "X"]):
123
+ result_df[f"Line {point_string} {dim} (nm)"] = point_coords[dim_idx]
124
+ results_df.append(result_df)
125
+
126
+ results_df = pd.concat(results_df, ignore_index=True)
127
+ return results_df
128
+
129
+ def get_fit_lines_to_segmentations(self):
130
+ # append column with default values to df
131
+ for s_e in ["Start", "End"]:
132
+ for dim in ["Z", "Y", "X"]:
133
+ self.df[f"Line {s_e} {dim} (nm)"] = np.nan
134
+
135
+ ddf = dd.from_pandas(
136
+ self.df, npartitions=guesstimate_npartitions(self.df, self.num_workers)
137
+ )
138
+
139
+ meta = pd.DataFrame(columns=self.df.columns)
140
+ ddf_out = ddf.map_partitions(self.fit_lines_to_objects, meta=meta)
141
+ with start_dask(self.num_workers, "line fits", logger):
142
+ with io_util.Timing_Messager("Fitting lines", logger):
143
+ # results = ddf_out.compute()
144
+ df = dask_computer(ddf_out, self.num_workers, **self.compute_args)
145
+ df["Object ID"] = df["Object ID"].astype(int)
146
+ df.to_csv(self.output_csv, index=False)
147
+
148
+ if self.output_annotations_dir is not None:
149
+ with io_util.Timing_Messager("Writing annotations", logger):
150
+ cols = [f"Line Start {d} (nm)" for d in ["Z", "Y", "X"]] + [
151
+ f"Line End {d} (nm)" for d in ["Z", "Y", "X"]
152
+ ]
153
+ write_out_annotations(
154
+ self.output_annotations_dir,
155
+ df["Object ID"].values,
156
+ df[cols].to_numpy(),
157
+ )