sainsc 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sainsc-0.1.0/Cargo.toml +23 -0
- sainsc-0.1.0/LICENSE +21 -0
- sainsc-0.1.0/MANIFEST.in +11 -0
- sainsc-0.1.0/PKG-INFO +115 -0
- sainsc-0.1.0/README.md +48 -0
- sainsc-0.1.0/pyproject.toml +100 -0
- sainsc-0.1.0/sainsc/__init__.py +18 -0
- sainsc-0.1.0/sainsc/_typealias.py +21 -0
- sainsc-0.1.0/sainsc/_utils.py +45 -0
- sainsc-0.1.0/sainsc/_utils_rust.pyi +267 -0
- sainsc-0.1.0/sainsc/datasets.py +56 -0
- sainsc-0.1.0/sainsc/io/__init__.py +3 -0
- sainsc-0.1.0/sainsc/io/_io.py +295 -0
- sainsc-0.1.0/sainsc/io/_io_utils.py +29 -0
- sainsc-0.1.0/sainsc/io/_stereoseq_chips.py +28 -0
- sainsc-0.1.0/sainsc/lazykde/_LazyKDE.py +1295 -0
- sainsc-0.1.0/sainsc/lazykde/__init__.py +4 -0
- sainsc-0.1.0/sainsc/lazykde/_kernel.py +95 -0
- sainsc-0.1.0/sainsc/lazykde/_utils.py +79 -0
- sainsc-0.1.0/sainsc/py.typed +0 -0
- sainsc-0.1.0/sainsc/utils/__init__.py +3 -0
- sainsc-0.1.0/sainsc/utils/_signatures.py +45 -0
- sainsc-0.1.0/sainsc.egg-info/PKG-INFO +115 -0
- sainsc-0.1.0/sainsc.egg-info/SOURCES.txt +34 -0
- sainsc-0.1.0/sainsc.egg-info/dependency_links.txt +1 -0
- sainsc-0.1.0/sainsc.egg-info/not-zip-safe +1 -0
- sainsc-0.1.0/sainsc.egg-info/requires.txt +27 -0
- sainsc-0.1.0/sainsc.egg-info/top_level.txt +1 -0
- sainsc-0.1.0/setup.cfg +4 -0
- sainsc-0.1.0/src/coordinates.rs +165 -0
- sainsc-0.1.0/src/cosine.rs +378 -0
- sainsc-0.1.0/src/gridcounts.rs +468 -0
- sainsc-0.1.0/src/lib.rs +26 -0
- sainsc-0.1.0/src/sparsearray_conversion.rs +117 -0
- sainsc-0.1.0/src/sparsekde.rs +302 -0
- sainsc-0.1.0/src/utils.rs +6 -0
sainsc-0.1.0/Cargo.toml
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[package]
|
|
2
|
+
name = "_utils_rust"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
edition = "2021"
|
|
5
|
+
|
|
6
|
+
[lib]
|
|
7
|
+
name = "_utils_rust"
|
|
8
|
+
crate-type = ["cdylib"]
|
|
9
|
+
|
|
10
|
+
[dependencies]
|
|
11
|
+
bincode = { version = "1.3" }
|
|
12
|
+
indexmap = { version = "2.1.0", features = ["rayon"] }
|
|
13
|
+
itertools = { version = "0.12.1" }
|
|
14
|
+
ndarray = { version = "0.15.6", features = ["rayon"] }
|
|
15
|
+
ndarray-stats = { version = "0.5.1" }
|
|
16
|
+
num = { version = "0.4.1" }
|
|
17
|
+
numpy = { version = "0.21.0" }
|
|
18
|
+
polars = { version = "0.41", features = ["partition_by", "dtype-categorical"] }
|
|
19
|
+
polars-arrow = { version = "0.41" }
|
|
20
|
+
pyo3 = { version = "0.21.0", features = ["extension-module"] }
|
|
21
|
+
pyo3-polars = { version = "0.15.0" }
|
|
22
|
+
rayon = { version = "1.8.0" }
|
|
23
|
+
sprs = { version = "0.11.1", features = ["serde"] }
|
sainsc-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Niklas Müller-Bötticher, Naveed Ishaque, Roland Eils, Berlin Institute of Health @ Charité
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
sainsc-0.1.0/MANIFEST.in
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# setuptools-scm includes all tracked files by default in sdist
|
|
2
|
+
|
|
3
|
+
# exclude docs as it contains large files
|
|
4
|
+
prune docs
|
|
5
|
+
|
|
6
|
+
# exclude unnecessary configs
|
|
7
|
+
exclude .pre-commit-config.yaml .readthedocs.yaml .yamlfmt.yaml
|
|
8
|
+
prune .github
|
|
9
|
+
|
|
10
|
+
# exclude example data
|
|
11
|
+
prune data
|
sainsc-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: sainsc
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Segmentation-free Analysis of In Situ Capture data
|
|
5
|
+
Author-email: Niklas Müller-Bötticher <niklas.mueller-boetticher@charite.de>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024 Niklas Müller-Bötticher, Naveed Ishaque, Roland Eils, Berlin Institute of Health @ Charité
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
|
|
28
|
+
Project-URL: homepage, https://github.com/HiDiHlabs/sainsc
|
|
29
|
+
Project-URL: documentation, https://sainsc.readthedocs.io
|
|
30
|
+
Project-URL: repository, https://github.com/HiDiHlabs/sainsc
|
|
31
|
+
Classifier: Intended Audience :: Science/Research
|
|
32
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
33
|
+
Classifier: Operating System :: OS Independent
|
|
34
|
+
Classifier: Programming Language :: Python
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
37
|
+
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
38
|
+
Classifier: Programming Language :: Rust
|
|
39
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
40
|
+
Classifier: Typing :: Typed
|
|
41
|
+
Requires-Python: >=3.10
|
|
42
|
+
Description-Content-Type: text/markdown
|
|
43
|
+
License-File: LICENSE
|
|
44
|
+
Requires-Dist: anndata>=0.9
|
|
45
|
+
Requires-Dist: matplotlib
|
|
46
|
+
Requires-Dist: matplotlib-scalebar
|
|
47
|
+
Requires-Dist: numba>=0.44
|
|
48
|
+
Requires-Dist: numpy>=1.21
|
|
49
|
+
Requires-Dist: pandas
|
|
50
|
+
Requires-Dist: polars[pandas]>=1
|
|
51
|
+
Requires-Dist: scikit-image>=0.18
|
|
52
|
+
Requires-Dist: scipy>=1.9
|
|
53
|
+
Requires-Dist: seaborn>=0.11
|
|
54
|
+
Provides-Extra: spatialdata
|
|
55
|
+
Requires-Dist: spatialdata>=0.1; extra == "spatialdata"
|
|
56
|
+
Provides-Extra: docs
|
|
57
|
+
Requires-Dist: sphinx; extra == "docs"
|
|
58
|
+
Requires-Dist: sphinx-autoapi>=3.1; extra == "docs"
|
|
59
|
+
Requires-Dist: sphinx-copybutton; extra == "docs"
|
|
60
|
+
Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
61
|
+
Requires-Dist: myst-nb; extra == "docs"
|
|
62
|
+
Provides-Extra: data
|
|
63
|
+
Requires-Dist: pooch>=1; extra == "data"
|
|
64
|
+
Provides-Extra: dev
|
|
65
|
+
Requires-Dist: sainsc[data,docs,spatialdata]; extra == "dev"
|
|
66
|
+
Requires-Dist: pre-commit; extra == "dev"
|
|
67
|
+
|
|
68
|
+
# sainsc
|
|
69
|
+
|
|
70
|
+
[](https://opensource.org/licenses/MIT)
|
|
71
|
+
[](https://github.com/psf/black)
|
|
72
|
+
[](https://github.com/astral-sh/ruff)
|
|
73
|
+
[](https://pycqa.github.io/isort/)
|
|
74
|
+
[](http://mypy-lang.org/)
|
|
75
|
+
[](https://github.com/pre-commit/pre-commit)
|
|
76
|
+
|
|
77
|
+
/ˈsaiəns/
|
|
78
|
+
|
|
79
|
+
_"**S**egmentation-free **A**nalysis of **In S**itu **C**apture data"_
|
|
80
|
+
or alternatively
|
|
81
|
+
"_**S**tupid **A**cronyms **In Sc**ience_"
|
|
82
|
+
|
|
83
|
+
`sainsc` is a segmentation-free analysis tool for spatial transcriptomics from in situ
|
|
84
|
+
capture technologies (but also works for imaging-based technologies). It is easily
|
|
85
|
+
integratable with the [scverse](https://github.com/scverse) (i.e. `scanpy` and `squidpy`)
|
|
86
|
+
by exporting data in [`AnnData`](https://anndata.readthedocs.io/) or
|
|
87
|
+
[`SpatialData`](https://spatialdata.scverse.org/) format.
|
|
88
|
+
|
|
89
|
+
## Installation
|
|
90
|
+
|
|
91
|
+
`sainsc` will be made available on [PyPI](https://pypi.org/) and
|
|
92
|
+
[bioconda](https://bioconda.github.io/). For detailed installation instructions
|
|
93
|
+
please refer to the [documentation](https://sainsc.readthedocs.io/en/stable/installation.html).
|
|
94
|
+
|
|
95
|
+
## Documentation
|
|
96
|
+
|
|
97
|
+
For an extensive documentation of the package please refer to the
|
|
98
|
+
[ReadTheDocs page](https://sainsc.readthedocs.io)
|
|
99
|
+
|
|
100
|
+
## Versioning
|
|
101
|
+
|
|
102
|
+
This project follows the [SemVer](https://semver.org/) guidelines for versioning.
|
|
103
|
+
|
|
104
|
+
## Citations
|
|
105
|
+
|
|
106
|
+
If you are using `sainsc` for your research please cite
|
|
107
|
+
|
|
108
|
+
Müller-Bötticher, N., Tiesmeyer, S., Eils, R., and Ishaque, N.
|
|
109
|
+
"Sainsc: a computational tool for segmentation-free analysis of in-situ capture"
|
|
110
|
+
bioRxiv (2024) https://doi.org/10.1101/2024.08.02.603879
|
|
111
|
+
|
|
112
|
+
## License
|
|
113
|
+
|
|
114
|
+
This project is licensed under the MIT License - for details please refer to the
|
|
115
|
+
[LICENSE](./LICENSE) file.
|
sainsc-0.1.0/README.md
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# sainsc
|
|
2
|
+
|
|
3
|
+
[](https://opensource.org/licenses/MIT)
|
|
4
|
+
[](https://github.com/psf/black)
|
|
5
|
+
[](https://github.com/astral-sh/ruff)
|
|
6
|
+
[](https://pycqa.github.io/isort/)
|
|
7
|
+
[](http://mypy-lang.org/)
|
|
8
|
+
[](https://github.com/pre-commit/pre-commit)
|
|
9
|
+
|
|
10
|
+
/ˈsaiəns/
|
|
11
|
+
|
|
12
|
+
_"**S**egmentation-free **A**nalysis of **In S**itu **C**apture data"_
|
|
13
|
+
or alternatively
|
|
14
|
+
"_**S**tupid **A**cronyms **In Sc**ience_"
|
|
15
|
+
|
|
16
|
+
`sainsc` is a segmentation-free analysis tool for spatial transcriptomics from in situ
|
|
17
|
+
capture technologies (but also works for imaging-based technologies). It is easily
|
|
18
|
+
integratable with the [scverse](https://github.com/scverse) (i.e. `scanpy` and `squidpy`)
|
|
19
|
+
by exporting data in [`AnnData`](https://anndata.readthedocs.io/) or
|
|
20
|
+
[`SpatialData`](https://spatialdata.scverse.org/) format.
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
`sainsc` will be made available on [PyPI](https://pypi.org/) and
|
|
25
|
+
[bioconda](https://bioconda.github.io/). For detailed installation instructions
|
|
26
|
+
please refer to the [documentation](https://sainsc.readthedocs.io/en/stable/installation.html).
|
|
27
|
+
|
|
28
|
+
## Documentation
|
|
29
|
+
|
|
30
|
+
For an extensive documentation of the package please refer to the
|
|
31
|
+
[ReadTheDocs page](https://sainsc.readthedocs.io)
|
|
32
|
+
|
|
33
|
+
## Versioning
|
|
34
|
+
|
|
35
|
+
This project follows the [SemVer](https://semver.org/) guidelines for versioning.
|
|
36
|
+
|
|
37
|
+
## Citations
|
|
38
|
+
|
|
39
|
+
If you are using `sainsc` for your research please cite
|
|
40
|
+
|
|
41
|
+
Müller-Bötticher, N., Tiesmeyer, S., Eils, R., and Ishaque, N.
|
|
42
|
+
"Sainsc: a computational tool for segmentation-free analysis of in-situ capture"
|
|
43
|
+
bioRxiv (2024) https://doi.org/10.1101/2024.08.02.603879
|
|
44
|
+
|
|
45
|
+
## License
|
|
46
|
+
|
|
47
|
+
This project is licensed under the MIT License - for details please refer to the
|
|
48
|
+
[LICENSE](./LICENSE) file.
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0.0", "setuptools_scm[toml]>=6.2", "setuptools-rust>=1.7"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
[project]
|
|
7
|
+
name = "sainsc"
|
|
8
|
+
description = "Segmentation-free Analysis of In Situ Capture data"
|
|
9
|
+
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
|
+
license = { file = "LICENSE" }
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
dynamic = ["version"]
|
|
13
|
+
|
|
14
|
+
authors = [
|
|
15
|
+
{ name = "Niklas Müller-Bötticher", email = "niklas.mueller-boetticher@charite.de" },
|
|
16
|
+
]
|
|
17
|
+
dependencies = [
|
|
18
|
+
"anndata>=0.9",
|
|
19
|
+
"matplotlib",
|
|
20
|
+
"matplotlib-scalebar",
|
|
21
|
+
"numba>=0.44",
|
|
22
|
+
"numpy>=1.21",
|
|
23
|
+
"pandas",
|
|
24
|
+
"polars[pandas]>=1",
|
|
25
|
+
"scikit-image>=0.18",
|
|
26
|
+
"scipy>=1.9",
|
|
27
|
+
"seaborn>=0.11",
|
|
28
|
+
]
|
|
29
|
+
classifiers = [
|
|
30
|
+
"Intended Audience :: Science/Research",
|
|
31
|
+
"License :: OSI Approved :: MIT License",
|
|
32
|
+
"Operating System :: OS Independent",
|
|
33
|
+
"Programming Language :: Python",
|
|
34
|
+
"Programming Language :: Python :: 3",
|
|
35
|
+
"Programming Language :: Python :: 3 :: Only",
|
|
36
|
+
"Programming Language :: Python :: Implementation :: CPython",
|
|
37
|
+
"Programming Language :: Rust",
|
|
38
|
+
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
39
|
+
"Typing :: Typed",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.optional-dependencies]
|
|
43
|
+
spatialdata = ["spatialdata>=0.1"]
|
|
44
|
+
docs = [
|
|
45
|
+
"sphinx",
|
|
46
|
+
"sphinx-autoapi>=3.1",
|
|
47
|
+
"sphinx-copybutton",
|
|
48
|
+
"sphinx-rtd-theme",
|
|
49
|
+
"myst-nb",
|
|
50
|
+
]
|
|
51
|
+
data = ["pooch>=1"]
|
|
52
|
+
dev = ["sainsc[data,docs,spatialdata]", "pre-commit"]
|
|
53
|
+
|
|
54
|
+
[project.urls]
|
|
55
|
+
homepage = "https://github.com/HiDiHlabs/sainsc"
|
|
56
|
+
documentation = "https://sainsc.readthedocs.io"
|
|
57
|
+
repository = "https://github.com/HiDiHlabs/sainsc"
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
[tool]
|
|
61
|
+
|
|
62
|
+
[tool.setuptools]
|
|
63
|
+
zip-safe = false
|
|
64
|
+
|
|
65
|
+
[tool.setuptools.packages.find]
|
|
66
|
+
include = ["sainsc", "sainsc.io", "sainsc.lazykde", "sainsc.utils"]
|
|
67
|
+
|
|
68
|
+
[tool.setuptools_scm]
|
|
69
|
+
|
|
70
|
+
[[tool.setuptools-rust.ext-modules]]
|
|
71
|
+
target = "sainsc._utils_rust"
|
|
72
|
+
|
|
73
|
+
[tool.isort]
|
|
74
|
+
profile = "black"
|
|
75
|
+
|
|
76
|
+
[tool.black]
|
|
77
|
+
target-version = ["py310", "py311", "py312"]
|
|
78
|
+
|
|
79
|
+
[tool.ruff]
|
|
80
|
+
target-version = "py310"
|
|
81
|
+
|
|
82
|
+
[tool.mypy]
|
|
83
|
+
python_version = "3.10"
|
|
84
|
+
ignore_missing_imports = true
|
|
85
|
+
warn_no_return = false
|
|
86
|
+
packages = "sainsc"
|
|
87
|
+
plugins = "numpy.typing.mypy_plugin"
|
|
88
|
+
|
|
89
|
+
[tool.codespell]
|
|
90
|
+
ignore-words-list = "coo,crate"
|
|
91
|
+
|
|
92
|
+
[tool.cibuildwheel]
|
|
93
|
+
archs = 'auto64'
|
|
94
|
+
# build = 'cp310-*'
|
|
95
|
+
skip = "pp*" # skip PyPy
|
|
96
|
+
|
|
97
|
+
[tool.cibuildwheel.linux]
|
|
98
|
+
# cibuildwheel runs linux in containers so we need to install rust there
|
|
99
|
+
before-all = "curl -sSf https://sh.rustup.rs | sh -s -- --default-toolchain stable -y"
|
|
100
|
+
environment = { PATH = "$PATH:$HOME/.cargo/bin" }
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
__version__ = version("sainsc")
|
|
5
|
+
except PackageNotFoundError:
|
|
6
|
+
__version__ = "unknown version"
|
|
7
|
+
|
|
8
|
+
from ._utils_rust import GridCounts
|
|
9
|
+
from .io import read_StereoSeq, read_StereoSeq_bins
|
|
10
|
+
from .lazykde import LazyKDE, gaussian_kernel
|
|
11
|
+
|
|
12
|
+
__all__ = [
|
|
13
|
+
"GridCounts",
|
|
14
|
+
"LazyKDE",
|
|
15
|
+
"gaussian_kernel",
|
|
16
|
+
"read_StereoSeq",
|
|
17
|
+
"read_StereoSeq_bins",
|
|
18
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import TypeAlias
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from numpy.typing import NDArray
|
|
6
|
+
from scipy.sparse import csc_array, csc_matrix, csr_array, csr_matrix
|
|
7
|
+
|
|
8
|
+
_PathLike: TypeAlias = os.PathLike[str] | str
|
|
9
|
+
|
|
10
|
+
_Csr: TypeAlias = csr_array | csr_matrix
|
|
11
|
+
_Csc: TypeAlias = csc_array | csc_matrix
|
|
12
|
+
_Csx: TypeAlias = _Csr | _Csc
|
|
13
|
+
_CsxArray: TypeAlias = csc_array | csr_array
|
|
14
|
+
|
|
15
|
+
_RangeTuple: TypeAlias = tuple[int, int]
|
|
16
|
+
_RangeTuple2D: TypeAlias = tuple[_RangeTuple, _RangeTuple]
|
|
17
|
+
|
|
18
|
+
_Local_Max: TypeAlias = tuple[NDArray[np.int_], NDArray[np.int_]]
|
|
19
|
+
|
|
20
|
+
_Color: TypeAlias = str | tuple[float, float, float]
|
|
21
|
+
_Cmap: TypeAlias = str | list[_Color] | dict[str, _Color]
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from typing import NoReturn
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pandas as pd
|
|
6
|
+
from numpy.typing import NDArray
|
|
7
|
+
|
|
8
|
+
from ._utils_rust import coordinate_as_string
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _get_n_cpus() -> int:
|
|
12
|
+
return len(os.sched_getaffinity(0))
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _get_coordinate_index(
|
|
16
|
+
x: NDArray[np.integer],
|
|
17
|
+
y: NDArray[np.integer],
|
|
18
|
+
*,
|
|
19
|
+
name: str | None = None,
|
|
20
|
+
n_threads: int = 1,
|
|
21
|
+
) -> pd.Index:
|
|
22
|
+
x_i32: NDArray[np.int32] = x.astype(np.int32, copy=False)
|
|
23
|
+
y_i32: NDArray[np.int32] = y.astype(np.int32, copy=False)
|
|
24
|
+
|
|
25
|
+
return pd.Index(
|
|
26
|
+
coordinate_as_string(x_i32, y_i32, n_threads=n_threads), dtype=str, name=name
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _bin_coordinates(df: pd.DataFrame, bin_size: float) -> pd.DataFrame:
|
|
31
|
+
df = df.assign(
|
|
32
|
+
x=lambda df: _get_bin_coordinate(df["x"].to_numpy(), bin_size),
|
|
33
|
+
y=lambda df: _get_bin_coordinate(df["y"].to_numpy(), bin_size),
|
|
34
|
+
)
|
|
35
|
+
return df
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _get_bin_coordinate(coor: NDArray[np.number], bin_size: float) -> NDArray[np.int32]:
|
|
39
|
+
return np.floor(coor / bin_size).astype(np.int32, copy=False)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _raise_module_load_error(e: Exception, fn: str, pkg: str, extra: str) -> NoReturn:
|
|
43
|
+
raise ModuleNotFoundError(
|
|
44
|
+
f"`{fn}` requires '{pkg}' to be installed, e.g. via the '{extra}' extra."
|
|
45
|
+
) from e
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from numpy.typing import NDArray
|
|
3
|
+
from polars import DataFrame
|
|
4
|
+
|
|
5
|
+
from ._typealias import _Csx, _CsxArray
|
|
6
|
+
|
|
7
|
+
def sparse_kde_csx_py(
|
|
8
|
+
counts: _Csx, kernel: NDArray[np.float32], *, threshold: float = 0
|
|
9
|
+
) -> _CsxArray:
|
|
10
|
+
"""
|
|
11
|
+
Calculate the KDE for each spot with counts as uint16.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def kde_at_coord(
|
|
15
|
+
counts: GridCounts,
|
|
16
|
+
genes: list[str],
|
|
17
|
+
kernel: NDArray[np.float32],
|
|
18
|
+
coordinates: tuple[NDArray[np.int_], NDArray[np.int_]],
|
|
19
|
+
*,
|
|
20
|
+
n_threads: int | None = None,
|
|
21
|
+
) -> _CsxArray:
|
|
22
|
+
"""
|
|
23
|
+
Calculate KDE at the given coordinates.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def categorical_coordinate(
|
|
27
|
+
x: NDArray[np.int32], y: NDArray[np.int32], *, n_threads: int | None = None
|
|
28
|
+
) -> tuple[NDArray[np.int32], NDArray[np.int32]]:
|
|
29
|
+
"""
|
|
30
|
+
Get the codes and the coordinates (comparable to a pandas.Categorical)
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def coordinate_as_string(
|
|
34
|
+
x: NDArray[np.int32], y: NDArray[np.int32], *, n_threads: int | None = None
|
|
35
|
+
) -> NDArray[np.str_]:
|
|
36
|
+
"""
|
|
37
|
+
Concatenate two int arrays elementwise into a string representation (i.e. 'x_y').
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def cosinef32_and_celltypei8(
|
|
41
|
+
counts: GridCounts,
|
|
42
|
+
genes: list[str],
|
|
43
|
+
signatures: NDArray[np.float32],
|
|
44
|
+
kernel: NDArray[np.float32],
|
|
45
|
+
*,
|
|
46
|
+
log: bool = False,
|
|
47
|
+
chunk_size: tuple[int, int] = (500, 500),
|
|
48
|
+
n_threads: int | None = None,
|
|
49
|
+
) -> tuple[NDArray[np.float32], NDArray[np.float32], NDArray[np.int8]]:
|
|
50
|
+
"""
|
|
51
|
+
Calculate the cosine similarity given counts and signatures and assign the most
|
|
52
|
+
similar celltype.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
def cosinef32_and_celltypei16(
|
|
56
|
+
counts: GridCounts,
|
|
57
|
+
genes: list[str],
|
|
58
|
+
signatures: NDArray[np.float32],
|
|
59
|
+
kernel: NDArray[np.float32],
|
|
60
|
+
*,
|
|
61
|
+
log: bool = False,
|
|
62
|
+
chunk_size: tuple[int, int] = (500, 500),
|
|
63
|
+
n_threads: int | None = None,
|
|
64
|
+
) -> tuple[NDArray[np.float32], NDArray[np.float32], NDArray[np.int16]]:
|
|
65
|
+
"""
|
|
66
|
+
Calculate the cosine similarity given counts and signatures and assign the most
|
|
67
|
+
similar celltype.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
class GridCounts:
|
|
71
|
+
"""
|
|
72
|
+
Object holding each gene as count data in a sparse 2D-grid.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
shape: tuple[int, int]
|
|
76
|
+
"""
|
|
77
|
+
tuple[int, int]: Shape of the count arrays.
|
|
78
|
+
"""
|
|
79
|
+
|
|
80
|
+
def __init__(
|
|
81
|
+
self,
|
|
82
|
+
counts: dict[str, _Csx],
|
|
83
|
+
*,
|
|
84
|
+
resolution: float | None = None,
|
|
85
|
+
n_threads: int | None = None,
|
|
86
|
+
):
|
|
87
|
+
"""
|
|
88
|
+
Parameters
|
|
89
|
+
----------
|
|
90
|
+
counts : dict[str, scipy.sparse.csr_array | scipy.sparse.csr_matrix | scipy.sparse.csc_array | scipy.sparse.csc_matrix]
|
|
91
|
+
Gene counts.
|
|
92
|
+
resolution : float, optional
|
|
93
|
+
Resolution as nm / pixel.
|
|
94
|
+
n_threads : int, optional
|
|
95
|
+
Number of threads used for reading and processing file. If `None` this will
|
|
96
|
+
default to the number of logical CPUs.
|
|
97
|
+
|
|
98
|
+
Raises
|
|
99
|
+
------
|
|
100
|
+
ValueError
|
|
101
|
+
If genes in `counts` do not all have the same shape.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def from_dataframe(
|
|
106
|
+
cls,
|
|
107
|
+
df: DataFrame,
|
|
108
|
+
*,
|
|
109
|
+
resolution: float | None = None,
|
|
110
|
+
binsize: float | None = None,
|
|
111
|
+
n_threads: int | None = None,
|
|
112
|
+
): # -> Self
|
|
113
|
+
"""
|
|
114
|
+
Initialize from dataframe.
|
|
115
|
+
|
|
116
|
+
Transform a :py:class:`polars.DataFrame` that provides a 'gene', 'x', and 'y'
|
|
117
|
+
column into :py:class:`sainsc.GridCounts`. If a 'count' column exists it will
|
|
118
|
+
be used as counts else a count of 1 (single molecule) per row will be assumed.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
df : polars.DataFrame
|
|
123
|
+
The data to be transformed.
|
|
124
|
+
binsize : float or None, optional
|
|
125
|
+
The size to bin the coordinates by. If None coordinates must be integers.
|
|
126
|
+
resolution : float, optional
|
|
127
|
+
Resolution of each coordinate unit in nm. The default is 1,000 i.e. measurements
|
|
128
|
+
are in um.
|
|
129
|
+
n_threads : int, optional
|
|
130
|
+
Number of threads used for initializing :py:class:`sainsc.LazyKDE`.
|
|
131
|
+
If `None` this will default to the number of logical CPUs.
|
|
132
|
+
|
|
133
|
+
Returns
|
|
134
|
+
-------
|
|
135
|
+
sainsc.GridCounts
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
def as_dataframe(self) -> DataFrame:
|
|
139
|
+
"""
|
|
140
|
+
Convert to a dataframe with 'gene', 'x', 'y', and 'count' column.
|
|
141
|
+
|
|
142
|
+
Returns
|
|
143
|
+
-------
|
|
144
|
+
polars.DataFrame
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
def __getitem__(self, key: str) -> _CsxArray: ...
|
|
148
|
+
def __setitem__(self, key: str, value: _Csx): ...
|
|
149
|
+
def __delitem__(self, key: str): ...
|
|
150
|
+
def __len__(self) -> int: ...
|
|
151
|
+
def __contains__(self, item: str) -> bool: ...
|
|
152
|
+
def __eq__(self, other) -> bool: ...
|
|
153
|
+
def __ne__(self, other) -> bool: ...
|
|
154
|
+
def get(self, key: str, default: _CsxArray | None = None) -> _CsxArray | None:
|
|
155
|
+
"""
|
|
156
|
+
Get the counts for a gene.
|
|
157
|
+
|
|
158
|
+
Parameters
|
|
159
|
+
----------
|
|
160
|
+
key : str
|
|
161
|
+
Name of the gene to retrieve.
|
|
162
|
+
|
|
163
|
+
Returns
|
|
164
|
+
-------
|
|
165
|
+
scipy.sparse.csr_array | scipy.sparse.csc_array | None
|
|
166
|
+
"""
|
|
167
|
+
|
|
168
|
+
def genes(self) -> list[str]:
|
|
169
|
+
"""
|
|
170
|
+
Get all available genes.
|
|
171
|
+
|
|
172
|
+
Returns
|
|
173
|
+
-------
|
|
174
|
+
list[str]
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
def gene_counts(self) -> dict[str, int]:
|
|
178
|
+
"""
|
|
179
|
+
Number of counts per gene.
|
|
180
|
+
|
|
181
|
+
Returns
|
|
182
|
+
-------
|
|
183
|
+
dict[str, int]
|
|
184
|
+
Mapping from gene to number of counts.
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
def grid_counts(self) -> NDArray[np.uintc]:
|
|
188
|
+
"""
|
|
189
|
+
Counts per pixel.
|
|
190
|
+
|
|
191
|
+
Aggregates counts across all genes.
|
|
192
|
+
|
|
193
|
+
Returns
|
|
194
|
+
-------
|
|
195
|
+
numpy.ndarray[numpy.uintc]
|
|
196
|
+
"""
|
|
197
|
+
|
|
198
|
+
def select_genes(self, genes: set[str]):
|
|
199
|
+
"""
|
|
200
|
+
Keep selected genes.
|
|
201
|
+
|
|
202
|
+
Parameters
|
|
203
|
+
----------
|
|
204
|
+
genes : set[str]
|
|
205
|
+
List of gene names to keep.
|
|
206
|
+
"""
|
|
207
|
+
|
|
208
|
+
def filter_genes_by_count(self, min: int = 1, max: int = 4_294_967_295):
|
|
209
|
+
"""
|
|
210
|
+
Filter genes by minimum and maximum count thresholds.
|
|
211
|
+
|
|
212
|
+
Parameters
|
|
213
|
+
----------
|
|
214
|
+
min : int, optional
|
|
215
|
+
Minimum count threshold.
|
|
216
|
+
max : int, optional
|
|
217
|
+
Maximum count threshold.
|
|
218
|
+
"""
|
|
219
|
+
|
|
220
|
+
def crop(self, x: tuple[int | None, int | None], y: tuple[int | None, int | None]):
|
|
221
|
+
"""
|
|
222
|
+
Crop the field of view for all genes.
|
|
223
|
+
|
|
224
|
+
Parameters
|
|
225
|
+
----------
|
|
226
|
+
x : tuple[int | None, int | None]
|
|
227
|
+
Range to crop as `(xmin, xmax)`
|
|
228
|
+
y : tuple[int | None, int | None]
|
|
229
|
+
Range to crop as `(ymin, ymax)`
|
|
230
|
+
"""
|
|
231
|
+
|
|
232
|
+
def filter_mask(self, mask: NDArray[np.bool_]):
|
|
233
|
+
"""
|
|
234
|
+
Filter all genes with a binary mask.
|
|
235
|
+
|
|
236
|
+
Parameters
|
|
237
|
+
----------
|
|
238
|
+
mask : numpy.ndarray[numpy.bool]
|
|
239
|
+
All counts where `mask` is `False` will be set to 0.
|
|
240
|
+
"""
|
|
241
|
+
|
|
242
|
+
@property
|
|
243
|
+
def resolution(self) -> float | None:
|
|
244
|
+
"""
|
|
245
|
+
float | None: Resolution in nm / pixel.
|
|
246
|
+
|
|
247
|
+
Raises
|
|
248
|
+
------
|
|
249
|
+
TypeError
|
|
250
|
+
If setting with a type other than `float` or `int`.
|
|
251
|
+
"""
|
|
252
|
+
|
|
253
|
+
@resolution.setter
|
|
254
|
+
def resolution(self, resolution: float): ...
|
|
255
|
+
@property
|
|
256
|
+
def n_threads(self) -> int:
|
|
257
|
+
"""
|
|
258
|
+
int: Number of threads used for processing.
|
|
259
|
+
|
|
260
|
+
Raises
|
|
261
|
+
------
|
|
262
|
+
TypeError
|
|
263
|
+
If setting with a type other than `int` or less than 0.
|
|
264
|
+
"""
|
|
265
|
+
|
|
266
|
+
@n_threads.setter
|
|
267
|
+
def n_threads(self, n_threads: int): ...
|