cellmap-analyze 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cellmap_analyze-0.0.1/LICENSE +28 -0
- cellmap_analyze-0.0.1/MANIFEST.in +1 -0
- cellmap_analyze-0.0.1/PKG-INFO +59 -0
- cellmap_analyze-0.0.1/README.md +24 -0
- cellmap_analyze-0.0.1/pyproject.toml +65 -0
- cellmap_analyze-0.0.1/setup.cfg +4 -0
- cellmap_analyze-0.0.1/setup.py +25 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/__init__.py +0 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/analyze/__init__.py +0 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/analyze/assign_to_cells.py +216 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/analyze/fit_lines_to_segmentations.py +157 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/analyze/measure.py +247 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/analyze/overlapping_voxels.py +203 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/__init__.py +0 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/bresenham3D.c +31879 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/bresenham3D.pyx +231 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/centers.cpp +12371 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/centers.pyx +59 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/impl/centers.hpp +67 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/process_arrays.c +58764 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/cythonizing/process_arrays.pyx +75 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/process/__init__.py +0 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/process/clean_connected_components.py +268 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/process/connected_components.py +620 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/process/contact_sites.py +223 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/process/fill_holes.py +273 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/process/filter_ids.py +192 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/process/mutex_watershed.py +337 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/util/__init__.py +0 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/util/block_util.py +59 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/util/dask_util.py +319 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/util/image_data_interface.py +256 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/util/information_holders.py +119 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/util/io_util.py +390 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/util/mask_util.py +204 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/util/measure_util.py +255 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/util/neuroglancer_util.py +75 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze/util/zarr_util.py +113 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/PKG-INFO +59 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/SOURCES.txt +44 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/dependency_links.txt +1 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/entry_points.txt +9 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/requires.txt +25 -0
- cellmap_analyze-0.0.1/src/cellmap_analyze.egg-info/top_level.txt +2 -0
- cellmap_analyze-0.0.1/src/cli/__init__.py +0 -0
- cellmap_analyze-0.0.1/src/cli/cli.py +106 -0
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
BSD 3-Clause License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025, CellMap Project Team
|
|
4
|
+
|
|
5
|
+
Redistribution and use in source and binary forms, with or without
|
|
6
|
+
modification, are permitted provided that the following conditions are met:
|
|
7
|
+
|
|
8
|
+
1. Redistributions of source code must retain the above copyright notice, this
|
|
9
|
+
list of conditions and the following disclaimer.
|
|
10
|
+
|
|
11
|
+
2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12
|
+
this list of conditions and the following disclaimer in the documentation
|
|
13
|
+
and/or other materials provided with the distribution.
|
|
14
|
+
|
|
15
|
+
3. Neither the name of the copyright holder nor the names of its
|
|
16
|
+
contributors may be used to endorse or promote products derived from
|
|
17
|
+
this software without specific prior written permission.
|
|
18
|
+
|
|
19
|
+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
20
|
+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
21
|
+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22
|
+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
|
23
|
+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24
|
+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25
|
+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26
|
+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27
|
+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28
|
+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
include src/cellmap_analyze/cythonizing/*.pyx
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cellmap-analyze
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: Code to perform analysis on segmentations like those produced by CellMap
|
|
5
|
+
Author-email: David Ackerman <ackermand@janelia.hhmi.org>
|
|
6
|
+
Maintainer-email: David Ackerman <ackermand@janelia.hhmi.org>
|
|
7
|
+
Requires-Python: <3.13,>=3.10
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: bokeh>=3.1.0
|
|
11
|
+
Requires-Dist: fastremap
|
|
12
|
+
Requires-Dist: funlib.geometry
|
|
13
|
+
Requires-Dist: funlib.persistence==0.3.0
|
|
14
|
+
Requires-Dist: numpy
|
|
15
|
+
Requires-Dist: pandas
|
|
16
|
+
Requires-Dist: connected-components-3d
|
|
17
|
+
Requires-Dist: dask[distributed]
|
|
18
|
+
Requires-Dist: dask-jobqueue
|
|
19
|
+
Requires-Dist: fastmorph
|
|
20
|
+
Requires-Dist: scipy
|
|
21
|
+
Requires-Dist: scikit-image
|
|
22
|
+
Requires-Dist: tensorstore
|
|
23
|
+
Requires-Dist: tqdm
|
|
24
|
+
Requires-Dist: zarr==2.18.5
|
|
25
|
+
Requires-Dist: numcodecs==0.13.0
|
|
26
|
+
Requires-Dist: mwatershed
|
|
27
|
+
Requires-Dist: pyarrow
|
|
28
|
+
Requires-Dist: neuroglancer
|
|
29
|
+
Provides-Extra: dev
|
|
30
|
+
Requires-Dist: pytest; extra == "dev"
|
|
31
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
32
|
+
Requires-Dist: coverage; extra == "dev"
|
|
33
|
+
Requires-Dist: build; extra == "dev"
|
|
34
|
+
Dynamic: license-file
|
|
35
|
+
|
|
36
|
+

|
|
37
|
+
[](https://codecov.io/gh/janelia-cellmap/cellmap-analyze)
|
|
38
|
+
|
|
39
|
+
# Tools For Analyzing Large 3D Datasets
|
|
40
|
+
|
|
41
|
+
This repository is a set of tools for processing and analyzing terabyte size 3D segmentation datasets using Dask. Processing tools involve reading in dataset(s) and outputting another - processed - dataset. These processing tools include the calculation of:
|
|
42
|
+
|
|
43
|
+
1. `Connected Components`: includes thresholding and masking
|
|
44
|
+
2. `Clean Connected Components`: tools for cleaning up an existing segmentation
|
|
45
|
+
3. `Contact Sites`: includes setting a distance for contact sites
|
|
46
|
+
4. `Filling holes`: fills holes in segmentations
|
|
47
|
+
5. `Filtering ids`: filters segmented ids to remove unwanted ones.
|
|
48
|
+
|
|
49
|
+
In addition, there are also tools for analysis of the 3D datasets including:
|
|
50
|
+
|
|
51
|
+
1. `Measurement`: measures a variety of properties of the segmented ids (volume, surface area, etc.) as well as properties of contact sites (volume, surface area, contacting objects, etc.).
|
|
52
|
+
2. `Fitting lines to segmentations`: useful for cylindrical-type objects.
|
|
53
|
+
3. `Assigning to cells`: Assigns objects to the cells they are in based on the center of mass lcoations of the cells.
|
|
54
|
+
|
|
55
|
+
TODO: Include a detailed description of installation and running the code
|
|
56
|
+
</div>
|
|
57
|
+
|
|
58
|
+
### Acknowledgements
|
|
59
|
+
Code for finding centers was taken from [funlib.evaluate](https://github.com/funkelab/funlib.evaluate).
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+

|
|
2
|
+
[](https://codecov.io/gh/janelia-cellmap/cellmap-analyze)
|
|
3
|
+
|
|
4
|
+
# Tools For Analyzing Large 3D Datasets
|
|
5
|
+
|
|
6
|
+
This repository is a set of tools for processing and analyzing terabyte size 3D segmentation datasets using Dask. Processing tools involve reading in dataset(s) and outputting another - processed - dataset. These processing tools include the calculation of:
|
|
7
|
+
|
|
8
|
+
1. `Connected Components`: includes thresholding and masking
|
|
9
|
+
2. `Clean Connected Components`: tools for cleaning up an existing segmentation
|
|
10
|
+
3. `Contact Sites`: includes setting a distance for contact sites
|
|
11
|
+
4. `Filling holes`: fills holes in segmentations
|
|
12
|
+
5. `Filtering ids`: filters segmented ids to remove unwanted ones.
|
|
13
|
+
|
|
14
|
+
In addition, there are also tools for analysis of the 3D datasets including:
|
|
15
|
+
|
|
16
|
+
1. `Measurement`: measures a variety of properties of the segmented ids (volume, surface area, etc.) as well as properties of contact sites (volume, surface area, contacting objects, etc.).
|
|
17
|
+
2. `Fitting lines to segmentations`: useful for cylindrical-type objects.
|
|
18
|
+
3. `Assigning to cells`: Assigns objects to the cells they are in based on the center of mass lcoations of the cells.
|
|
19
|
+
|
|
20
|
+
TODO: Include a detailed description of installation and running the code
|
|
21
|
+
</div>
|
|
22
|
+
|
|
23
|
+
### Acknowledgements
|
|
24
|
+
Code for finding centers was taken from [funlib.evaluate](https://github.com/funkelab/funlib.evaluate).
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "cellmap-analyze"
|
|
3
|
+
version = "0.0.1"
|
|
4
|
+
description = "Code to perform analysis on segmentations like those produced by CellMap"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license-files = ["LICENSE"]
|
|
7
|
+
requires-python = ">=3.10,<3.13"
|
|
8
|
+
|
|
9
|
+
authors = [
|
|
10
|
+
{ name = "David Ackerman", email = "ackermand@janelia.hhmi.org" },
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
maintainers = [
|
|
14
|
+
{ name = "David Ackerman", email = "ackermand@janelia.hhmi.org" },
|
|
15
|
+
]
|
|
16
|
+
|
|
17
|
+
dependencies = [
|
|
18
|
+
"bokeh>=3.1.0",
|
|
19
|
+
"fastremap",
|
|
20
|
+
"funlib.geometry",
|
|
21
|
+
"funlib.persistence==0.3.0",
|
|
22
|
+
"numpy",
|
|
23
|
+
"pandas",
|
|
24
|
+
"connected-components-3d",
|
|
25
|
+
"dask[distributed]",
|
|
26
|
+
"dask-jobqueue",
|
|
27
|
+
"fastmorph",
|
|
28
|
+
"scipy",
|
|
29
|
+
"scikit-image",
|
|
30
|
+
"tensorstore",
|
|
31
|
+
"tqdm",
|
|
32
|
+
"zarr==2.18.5",
|
|
33
|
+
"numcodecs==0.13.0",
|
|
34
|
+
"mwatershed",
|
|
35
|
+
"pyarrow",
|
|
36
|
+
"neuroglancer",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[project.optional-dependencies]
|
|
40
|
+
dev = ["pytest", "pytest-cov", "coverage", "build"]
|
|
41
|
+
|
|
42
|
+
[project.scripts]
|
|
43
|
+
connected-components = "cli.cli:connected_components"
|
|
44
|
+
clean-connected-components = "cli.cli:clean_connected_components"
|
|
45
|
+
contact-sites = "cli.cli:contact_sites"
|
|
46
|
+
mutex-watershed = "cli.cli:mutex_watershed"
|
|
47
|
+
filter-ids = "cli.cli:filter_ids"
|
|
48
|
+
measure = "cli.cli:measure"
|
|
49
|
+
fit-lines-to-segmentations = "cli.cli:fit_lines_to_segmentations"
|
|
50
|
+
assign-to-cells = "cli.cli:assign_to_cells"
|
|
51
|
+
|
|
52
|
+
[tool.setuptools]
|
|
53
|
+
package-dir = { "" = "src" }
|
|
54
|
+
|
|
55
|
+
[tool.setuptools.packages.find]
|
|
56
|
+
where = ["src"]
|
|
57
|
+
|
|
58
|
+
[build-system]
|
|
59
|
+
requires = [
|
|
60
|
+
"setuptools>=61",
|
|
61
|
+
"wheel",
|
|
62
|
+
"Cython>=0.29",
|
|
63
|
+
"numpy"
|
|
64
|
+
]
|
|
65
|
+
build-backend = "setuptools.build_meta"
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from setuptools import setup, Extension
|
|
2
|
+
from Cython.Build import cythonize
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
extensions = [
|
|
6
|
+
Extension(
|
|
7
|
+
"cellmap_analyze.cythonizing.bresenham3D",
|
|
8
|
+
["src/cellmap_analyze/cythonizing/bresenham3D.pyx"],
|
|
9
|
+
),
|
|
10
|
+
Extension(
|
|
11
|
+
"cellmap_analyze.cythonizing.process_arrays",
|
|
12
|
+
["src/cellmap_analyze/cythonizing/process_arrays.pyx"],
|
|
13
|
+
),
|
|
14
|
+
Extension(
|
|
15
|
+
"cellmap_analyze.cythonizing.centers",
|
|
16
|
+
["src/cellmap_analyze/cythonizing/centers.pyx"],
|
|
17
|
+
language="c++",
|
|
18
|
+
include_dirs=[np.get_include()],
|
|
19
|
+
extra_compile_args=["-std=c++11"],
|
|
20
|
+
),
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
setup(
|
|
24
|
+
ext_modules=cythonize(extensions, language_level="3"),
|
|
25
|
+
)
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
# %%
|
|
2
|
+
from typing import Union, List
|
|
3
|
+
from cellmap_analyze.util import io_util
|
|
4
|
+
from cellmap_analyze.util.image_data_interface import (
|
|
5
|
+
ImageDataInterface,
|
|
6
|
+
)
|
|
7
|
+
import logging
|
|
8
|
+
import pandas as pd
|
|
9
|
+
import numpy as np
|
|
10
|
+
import os
|
|
11
|
+
from scipy import spatial
|
|
12
|
+
from tqdm import tqdm
|
|
13
|
+
import fastremap
|
|
14
|
+
import fastmorph
|
|
15
|
+
|
|
16
|
+
logging.basicConfig(
|
|
17
|
+
format="%(asctime)s %(levelname)-8s %(message)s",
|
|
18
|
+
level=logging.INFO,
|
|
19
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
20
|
+
)
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class AssignToCells:
|
|
25
|
+
def __init__(
|
|
26
|
+
self,
|
|
27
|
+
organelle_csvs: Union[str, List[str]],
|
|
28
|
+
cell_ds_path: str,
|
|
29
|
+
output_path: str,
|
|
30
|
+
cell_assignment_type: int = 0,
|
|
31
|
+
iteration_distance_nm=10_000,
|
|
32
|
+
):
|
|
33
|
+
if isinstance(organelle_csvs, str):
|
|
34
|
+
organelle_csvs = [organelle_csvs]
|
|
35
|
+
|
|
36
|
+
self.organelle_info_dict = {}
|
|
37
|
+
|
|
38
|
+
for organelle_csv in organelle_csvs:
|
|
39
|
+
df = pd.read_csv(organelle_csv)
|
|
40
|
+
if "Total Objects" in df.columns:
|
|
41
|
+
# delete last two columns of dataframe
|
|
42
|
+
df = df.iloc[:, :-2]
|
|
43
|
+
self.organelle_info_dict[organelle_csv] = df
|
|
44
|
+
|
|
45
|
+
self.cell_idi = ImageDataInterface(cell_ds_path)
|
|
46
|
+
self.cell_assignment_type = cell_assignment_type
|
|
47
|
+
self.output_path = output_path
|
|
48
|
+
self.iteration_distance_nm = iteration_distance_nm
|
|
49
|
+
|
|
50
|
+
@staticmethod
|
|
51
|
+
def assign_to_containing_cell(cell_idi, df):
|
|
52
|
+
cell_data = cell_idi.to_ndarray_ts()
|
|
53
|
+
coms = df[["COM Z (nm)", "COM Y (nm)", "COM X (nm)"]].to_numpy()
|
|
54
|
+
# Already have top left corners aligned since in measure_util get_region_properties we already center on voxel so that top left corners are aligned
|
|
55
|
+
inds = np.astype(coms // cell_idi.voxel_size, int)
|
|
56
|
+
in_bounds = np.all((inds >= cell_idi.domain.inclusive_min), axis=1) & np.all(
|
|
57
|
+
(inds < cell_idi.domain.exclusive_max), axis=1
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
df.loc[in_bounds, "Cell ID"] = cell_data[
|
|
61
|
+
inds[in_bounds, 0], inds[in_bounds, 1], inds[in_bounds, 2]
|
|
62
|
+
]
|
|
63
|
+
df["Cell ID"] = df["Cell ID"].astype(int)
|
|
64
|
+
|
|
65
|
+
@staticmethod
|
|
66
|
+
def assign_to_n_nearest_cells(cell_idi, df, n, iteration_distance_nm):
|
|
67
|
+
coms = df[["COM Z (nm)", "COM Y (nm)", "COM X (nm)"]].to_numpy()
|
|
68
|
+
inds = np.astype(coms // cell_idi.voxel_size, int)
|
|
69
|
+
cell_data = cell_idi.to_ndarray_ts()
|
|
70
|
+
if len(fastremap.unique(cell_data[cell_data > 0])) < n:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
f"Number of unique cell ids in the segmentation ({fastremap.unique(cell_data[cell_data > 0])}) is less than n ({n})."
|
|
73
|
+
" Please choose a smaller n or use a different assignment method."
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
num_points = len(coms)
|
|
77
|
+
boundaries = cell_data - fastmorph.erode(cell_data, erode_border=False)
|
|
78
|
+
unique_ids = fastremap.unique(boundaries[boundaries > 0])
|
|
79
|
+
|
|
80
|
+
# Initialize arrays to store the n closest distances and corresponding ids.
|
|
81
|
+
# Every query point gets an array of size n.
|
|
82
|
+
closest_distances = np.full((num_points, n), np.inf)
|
|
83
|
+
closest_ids = np.zeros((num_points, n), dtype=int)
|
|
84
|
+
|
|
85
|
+
boundary_coords = np.argwhere(boundaries)
|
|
86
|
+
boundary_ids = boundaries[
|
|
87
|
+
boundary_coords[:, 0], boundary_coords[:, 1], boundary_coords[:, 2]
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
maximum_distance = iteration_distance_nm
|
|
91
|
+
iteration = 0
|
|
92
|
+
|
|
93
|
+
# Continue looping until every query point has n finite (non-inf) distances.
|
|
94
|
+
while np.any(np.any(np.isinf(closest_distances), axis=1)):
|
|
95
|
+
iteration += 1
|
|
96
|
+
# Global update mask: select only those query points that still have at least one np.inf.
|
|
97
|
+
global_update_mask = np.any(np.isinf(closest_distances), axis=1)
|
|
98
|
+
if not np.any(global_update_mask):
|
|
99
|
+
break
|
|
100
|
+
|
|
101
|
+
# Loop over each unique boundary id.
|
|
102
|
+
for unique_id in tqdm(unique_ids):
|
|
103
|
+
# Create an update mask for query points that need updates and do not already have this unique_id.
|
|
104
|
+
# We check along the row: if the current candidate list already contains this unique_id, skip updating it.
|
|
105
|
+
already_has_id = np.any(closest_ids == unique_id, axis=1)
|
|
106
|
+
update_mask = global_update_mask & ~already_has_id
|
|
107
|
+
if not np.any(update_mask):
|
|
108
|
+
continue
|
|
109
|
+
|
|
110
|
+
# For the current unique object, get its boundary voxel coordinates, adjust by 0.5 for centering and scale.
|
|
111
|
+
coords = (
|
|
112
|
+
boundary_coords[boundary_ids == unique_id] + 0.5
|
|
113
|
+
) * cell_idi.voxel_size
|
|
114
|
+
tree = spatial.KDTree(coords)
|
|
115
|
+
|
|
116
|
+
# Query only for the points (coms) that need updating.
|
|
117
|
+
current_distances, _ = tree.query(
|
|
118
|
+
coms[update_mask], distance_upper_bound=maximum_distance * iteration
|
|
119
|
+
)
|
|
120
|
+
# Check if coms are within a cell
|
|
121
|
+
updated_inds = inds[update_mask]
|
|
122
|
+
in_bounds = np.all(
|
|
123
|
+
(updated_inds >= cell_idi.domain.inclusive_min), axis=1
|
|
124
|
+
) & np.all((updated_inds < cell_idi.domain.exclusive_max), axis=1)
|
|
125
|
+
valid_inds = updated_inds[in_bounds]
|
|
126
|
+
|
|
127
|
+
# Initialize an array of False of the same length as updated_inds.
|
|
128
|
+
within_cell = np.full(updated_inds.shape[0], False, dtype=bool)
|
|
129
|
+
|
|
130
|
+
# For the indices that are in bounds, assign the comparison result.
|
|
131
|
+
within_cell[in_bounds] = (
|
|
132
|
+
cell_data[valid_inds[:, 0], valid_inds[:, 1], valid_inds[:, 2]]
|
|
133
|
+
== unique_id
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
# If the com is within a cell, set the distance to 0
|
|
137
|
+
current_distances[within_cell] = 0
|
|
138
|
+
|
|
139
|
+
# Combine the current n best distances with the new candidate (this gives n+1 candidates per query point).
|
|
140
|
+
combined_distances = np.column_stack(
|
|
141
|
+
[closest_distances[update_mask], current_distances]
|
|
142
|
+
)
|
|
143
|
+
combined_ids = np.column_stack(
|
|
144
|
+
[
|
|
145
|
+
closest_ids[update_mask],
|
|
146
|
+
np.full(np.sum(update_mask), unique_id, dtype=int),
|
|
147
|
+
]
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# For each query point, sort candidates so that the smallest distances come first.
|
|
151
|
+
sort_order = np.argsort(combined_distances, axis=1)
|
|
152
|
+
sorted_distances = np.take_along_axis(
|
|
153
|
+
combined_distances, sort_order, axis=1
|
|
154
|
+
)
|
|
155
|
+
sorted_ids = np.take_along_axis(combined_ids, sort_order, axis=1)
|
|
156
|
+
|
|
157
|
+
# Update the candidate arrays for only the query points that needed an update.
|
|
158
|
+
closest_distances[update_mask] = sorted_distances[:, :n]
|
|
159
|
+
closest_ids[update_mask] = sorted_ids[:, :n]
|
|
160
|
+
|
|
161
|
+
# Update the DataFrame columns.
|
|
162
|
+
if n > 1:
|
|
163
|
+
df["Cell ID"] = [row.tolist() for row in closest_ids]
|
|
164
|
+
df["Cell Distance (nm)"] = [row.tolist() for row in closest_distances]
|
|
165
|
+
else:
|
|
166
|
+
df["Cell ID"] = closest_ids[:, 0]
|
|
167
|
+
df["Cell Distance (nm)"] = closest_distances[:, 0]
|
|
168
|
+
|
|
169
|
+
def assign_to_cells(self):
|
|
170
|
+
with io_util.Timing_Messager("Assigning objects to cells", logger):
|
|
171
|
+
for organelle_csv, df in self.organelle_info_dict.items():
|
|
172
|
+
# get filename from organelle_csv
|
|
173
|
+
filename = os.path.basename(organelle_csv)
|
|
174
|
+
if filename == "cell.csv":
|
|
175
|
+
continue
|
|
176
|
+
df["Cell ID"] = 0
|
|
177
|
+
if "er.csv" in organelle_csv:
|
|
178
|
+
df["Cell ID"] = df["Object ID"]
|
|
179
|
+
continue
|
|
180
|
+
if self.cell_assignment_type == 0:
|
|
181
|
+
self.assign_to_containing_cell(self.cell_idi, df)
|
|
182
|
+
continue
|
|
183
|
+
df["Cell Distance (nm)"] = 0
|
|
184
|
+
self.assign_to_n_nearest_cells(
|
|
185
|
+
self.cell_idi,
|
|
186
|
+
df,
|
|
187
|
+
self.cell_assignment_type,
|
|
188
|
+
self.iteration_distance_nm,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
def write_updated_csvs(self):
|
|
192
|
+
with io_util.Timing_Messager("Writing out updated dataframes", logger):
|
|
193
|
+
os.makedirs(self.output_path, exist_ok=True)
|
|
194
|
+
for csv, df in self.organelle_info_dict.items():
|
|
195
|
+
csv_name = os.path.basename(csv.split(".csv")[0])
|
|
196
|
+
output_path = self.output_path
|
|
197
|
+
if csv_name.endswith("contacts"): # pragma: no cover
|
|
198
|
+
output_path = self.output_path + "/contact_sites/"
|
|
199
|
+
os.makedirs(output_path, exist_ok=True)
|
|
200
|
+
|
|
201
|
+
if self.cell_assignment_type == 0:
|
|
202
|
+
output_name = (
|
|
203
|
+
f"{output_path}/{csv_name}_assigned_to_containing_cell"
|
|
204
|
+
)
|
|
205
|
+
elif self.cell_assignment_type == 1:
|
|
206
|
+
output_name = f"{output_path}/{csv_name}_assigned_to_nearest_cell"
|
|
207
|
+
else:
|
|
208
|
+
output_name = f"{output_path}/{csv_name}_assigned_to_{self.cell_assignment_type}_nearest_cells"
|
|
209
|
+
df["Object ID"] = df["Object ID"].astype(
|
|
210
|
+
int
|
|
211
|
+
) # in case was converted to float
|
|
212
|
+
df.to_csv(output_name + ".csv", index=False)
|
|
213
|
+
|
|
214
|
+
def get_cell_assignments(self):
|
|
215
|
+
self.assign_to_cells()
|
|
216
|
+
self.write_updated_csvs()
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
# %%
|
|
2
|
+
from funlib.geometry import Roi
|
|
3
|
+
import numpy as np
|
|
4
|
+
from cellmap_analyze.util.dask_util import (
|
|
5
|
+
dask_computer,
|
|
6
|
+
guesstimate_npartitions,
|
|
7
|
+
start_dask,
|
|
8
|
+
)
|
|
9
|
+
from cellmap_analyze.util import io_util
|
|
10
|
+
from cellmap_analyze.util.image_data_interface import ImageDataInterface
|
|
11
|
+
import logging
|
|
12
|
+
import pandas as pd
|
|
13
|
+
import dask.dataframe as dd
|
|
14
|
+
from cellmap_analyze.util.neuroglancer_util import write_out_annotations
|
|
15
|
+
import fastremap
|
|
16
|
+
import cc3d
|
|
17
|
+
|
|
18
|
+
logging.basicConfig(
|
|
19
|
+
format="%(asctime)s %(levelname)-8s %(message)s",
|
|
20
|
+
level=logging.INFO,
|
|
21
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
22
|
+
)
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class FitLinesToSegmentations:
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
input_csv,
|
|
30
|
+
input_path,
|
|
31
|
+
output_csv=None,
|
|
32
|
+
output_annotations_dir=None,
|
|
33
|
+
num_workers=8,
|
|
34
|
+
):
|
|
35
|
+
self.df = pd.read_csv(input_csv) # , nrows=1000)
|
|
36
|
+
self.segmentation_idi = ImageDataInterface(input_path)
|
|
37
|
+
self.voxel_size = self.segmentation_idi.voxel_size
|
|
38
|
+
self.num_workers = num_workers
|
|
39
|
+
self.output_csv = output_csv
|
|
40
|
+
if self.output_csv is None:
|
|
41
|
+
self.output_csv = input_csv.replace(".csv", "_lines.csv")
|
|
42
|
+
|
|
43
|
+
self.output_annotations_dir = output_annotations_dir
|
|
44
|
+
self.compute_args = {}
|
|
45
|
+
if self.num_workers == 1:
|
|
46
|
+
self.compute_args = {"scheduler": "single-threaded"}
|
|
47
|
+
|
|
48
|
+
@staticmethod
|
|
49
|
+
def find_min_max_projected_points(points, line_point, line_direction):
|
|
50
|
+
# chatgpt
|
|
51
|
+
line_direction = line_direction / np.linalg.norm(
|
|
52
|
+
line_direction
|
|
53
|
+
) # Normalize direction vector
|
|
54
|
+
|
|
55
|
+
# Calculate the vector from line_point to each point
|
|
56
|
+
point_vectors = points - line_point
|
|
57
|
+
|
|
58
|
+
# Calculate the projection scalar for each point using dot product and broadcasting
|
|
59
|
+
projection_scalars = np.sum(point_vectors * line_direction, axis=1)
|
|
60
|
+
|
|
61
|
+
# Calculate the projected points for each point
|
|
62
|
+
projected_points = (
|
|
63
|
+
line_point + projection_scalars[:, np.newaxis] * line_direction
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
# Find the minimum and maximum projection scalar indices
|
|
67
|
+
min_projection_idx = np.argmin(projection_scalars)
|
|
68
|
+
max_projection_idx = np.argmax(projection_scalars)
|
|
69
|
+
|
|
70
|
+
return (
|
|
71
|
+
projected_points[min_projection_idx],
|
|
72
|
+
projected_points[max_projection_idx],
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
@staticmethod
|
|
76
|
+
def fit_line_to_points(points, voxel_size, offset, line_origin):
|
|
77
|
+
# fit line to object voxels
|
|
78
|
+
_, _, vv = np.linalg.svd(points - np.mean(points, axis=0), full_matrices=False)
|
|
79
|
+
line_direction = vv[0]
|
|
80
|
+
|
|
81
|
+
# find endpoints of line segment so that we can write it as neuroglancer annotations
|
|
82
|
+
start_point, end_point = FitLinesToSegmentations.find_min_max_projected_points(
|
|
83
|
+
points * voxel_size + offset,
|
|
84
|
+
line_origin,
|
|
85
|
+
line_direction,
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
return start_point, end_point
|
|
89
|
+
|
|
90
|
+
@staticmethod
|
|
91
|
+
def fit_line_to_object(data, id, voxel_size, offset):
|
|
92
|
+
# only take largest component
|
|
93
|
+
data = cc3d.connected_components(data == id, connectivity=6, binary_image=True)
|
|
94
|
+
ids, counts = fastremap.unique(data[data > 0], return_counts=True)
|
|
95
|
+
id = ids[np.argmax(counts)]
|
|
96
|
+
points = np.column_stack(np.where(data == id))
|
|
97
|
+
com = np.mean(points, axis=0) * voxel_size + offset
|
|
98
|
+
start_point, end_point = FitLinesToSegmentations.fit_line_to_points(
|
|
99
|
+
points, voxel_size, offset, com
|
|
100
|
+
)
|
|
101
|
+
return start_point, end_point
|
|
102
|
+
|
|
103
|
+
def fit_lines_to_objects(self, df):
|
|
104
|
+
results_df = []
|
|
105
|
+
for _, row in df.iterrows():
|
|
106
|
+
id = row["Object ID"]
|
|
107
|
+
box_min = np.array([row[f"MIN {d} (nm)"] for d in ["Z", "Y", "X"]])
|
|
108
|
+
box_max = np.array([row[f"MAX {d} (nm)"] for d in ["Z", "Y", "X"]])
|
|
109
|
+
# define an roi to actually ecompass the bounding box
|
|
110
|
+
roi = Roi(
|
|
111
|
+
box_min - self.voxel_size, (box_max - box_min) + self.voxel_size * 2
|
|
112
|
+
)
|
|
113
|
+
data = self.segmentation_idi.to_ndarray_ts(roi)
|
|
114
|
+
line_start, line_end = FitLinesToSegmentations.fit_line_to_object(
|
|
115
|
+
data, id, self.voxel_size, roi.offset
|
|
116
|
+
)
|
|
117
|
+
result_df = pd.DataFrame([row])
|
|
118
|
+
|
|
119
|
+
for point_string, point_coords in zip(
|
|
120
|
+
["Start", "End"], [line_start, line_end]
|
|
121
|
+
):
|
|
122
|
+
for dim_idx, dim in enumerate(["Z", "Y", "X"]):
|
|
123
|
+
result_df[f"Line {point_string} {dim} (nm)"] = point_coords[dim_idx]
|
|
124
|
+
results_df.append(result_df)
|
|
125
|
+
|
|
126
|
+
results_df = pd.concat(results_df, ignore_index=True)
|
|
127
|
+
return results_df
|
|
128
|
+
|
|
129
|
+
def get_fit_lines_to_segmentations(self):
|
|
130
|
+
# append column with default values to df
|
|
131
|
+
for s_e in ["Start", "End"]:
|
|
132
|
+
for dim in ["Z", "Y", "X"]:
|
|
133
|
+
self.df[f"Line {s_e} {dim} (nm)"] = np.nan
|
|
134
|
+
|
|
135
|
+
ddf = dd.from_pandas(
|
|
136
|
+
self.df, npartitions=guesstimate_npartitions(self.df, self.num_workers)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
meta = pd.DataFrame(columns=self.df.columns)
|
|
140
|
+
ddf_out = ddf.map_partitions(self.fit_lines_to_objects, meta=meta)
|
|
141
|
+
with start_dask(self.num_workers, "line fits", logger):
|
|
142
|
+
with io_util.Timing_Messager("Fitting lines", logger):
|
|
143
|
+
# results = ddf_out.compute()
|
|
144
|
+
df = dask_computer(ddf_out, self.num_workers, **self.compute_args)
|
|
145
|
+
df["Object ID"] = df["Object ID"].astype(int)
|
|
146
|
+
df.to_csv(self.output_csv, index=False)
|
|
147
|
+
|
|
148
|
+
if self.output_annotations_dir is not None:
|
|
149
|
+
with io_util.Timing_Messager("Writing annotations", logger):
|
|
150
|
+
cols = [f"Line Start {d} (nm)" for d in ["Z", "Y", "X"]] + [
|
|
151
|
+
f"Line End {d} (nm)" for d in ["Z", "Y", "X"]
|
|
152
|
+
]
|
|
153
|
+
write_out_annotations(
|
|
154
|
+
self.output_annotations_dir,
|
|
155
|
+
df["Object ID"].values,
|
|
156
|
+
df[cols].to_numpy(),
|
|
157
|
+
)
|