cellsweep 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,101 @@
1
+ Metadata-Version: 2.4
2
+ Name: cellsweep
3
+ Version: 0.1.0
4
+ Summary: Denoising scRNA-seq in a smart and efficient way.
5
+ Author-email: Maya Caskey <mcaskey@caltech.edu>, Joseph Rich <jmrich@caltech.edu>
6
+ Maintainer-email: Maya Caskey <mcaskey@caltech.edu>, Joseph Rich <jmrich@caltech.edu>
7
+ Project-URL: Homepage, https://github.com/pachterlab/cellsweep
8
+ Keywords: bioinformatics,machine learning,single-cell,scRNA-seq,denoising,data-analysis
9
+ Classifier: Environment :: Console
10
+ Classifier: Framework :: Jupyter
11
+ Classifier: Intended Audience :: Science/Research
12
+ Classifier: License :: OSI Approved :: BSD License
13
+ Classifier: Operating System :: OS Independent
14
+ Classifier: Programming Language :: Python :: 3.9
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
19
+ Classifier: Topic :: Utilities
20
+ Requires-Python: >=3.9
21
+ Description-Content-Type: text/markdown
22
+ Requires-Dist: numpy
23
+ Requires-Dist: numba>=0.56.2
24
+ Requires-Dist: pandas
25
+ Requires-Dist: scipy
26
+ Requires-Dist: anndata
27
+ Requires-Dist: pydantic<3.0,>=2.5
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
30
+ Requires-Dist: black[jupyter]>=22.0.0; extra == "dev"
31
+ Requires-Dist: isort>=6.0.0; extra == "dev"
32
+ Requires-Dist: pympler>=1.1; extra == "dev"
33
+ Provides-Extra: analysis
34
+ Requires-Dist: nbval>=0.10.0; extra == "analysis"
35
+ Requires-Dist: nbdime>=4.0.2; extra == "analysis"
36
+ Requires-Dist: ipython>=8.0.0; extra == "analysis"
37
+ Requires-Dist: upsetplot; extra == "analysis"
38
+ Requires-Dist: PyYAML; extra == "analysis"
39
+ Requires-Dist: scanpy; extra == "analysis"
40
+ Requires-Dist: seaborn; extra == "analysis"
41
+ Requires-Dist: matplotlib; extra == "analysis"
42
+ Requires-Dist: celltypist; extra == "analysis"
43
+ Requires-Dist: mpl-scatter-density; extra == "analysis"
44
+ Requires-Dist: astropy; extra == "analysis"
45
+ Requires-Dist: scikit-learn; extra == "analysis"
46
+ Requires-Dist: ipywidgets; extra == "analysis"
47
+ Requires-Dist: torch; extra == "analysis"
48
+ Requires-Dist: scikit-misc; extra == "analysis"
49
+ Requires-Dist: adjustText; extra == "analysis"
50
+ Requires-Dist: squidpy; extra == "analysis"
51
+ Requires-Dist: tqdm; extra == "analysis"
52
+
53
+ # cellsweep
54
+
55
+ Sweep out noisy counts from single-cell RNA-seq data with CellSweep!
56
+
57
+ ![alt text](https://github.com/pachterlab/cellsweep/blob/main/figures/logo.png?raw=true)
58
+
59
+ ## Install
60
+ ### Basic use
61
+ ```
62
+ pip install cellsweep
63
+ ```
64
+
65
+ ### To run notebooks:
66
+ ```
67
+ pip install cellsweep[analysis]
68
+ ```
69
+
70
+ ### To remake figures from the paper:
71
+ ```
72
+ git clone https://github.com/pachterlab/cellsweep.git
73
+ cd cellsweep
74
+ conda env create -f environment.yml
75
+ pip install cellsweep[analysis]==0.1.0
76
+ ```
77
+
78
+ ## Quickstart
79
+ CellSweep has a single function denoise_count_matrix that takes a raw count matrix in an AnnData object and produces a denoised count matrix in another AnnData object. See a simple, fully worked example in the `notebooks/intro.ipynb` Jupyter Notebook.
80
+
81
+ ### Python API
82
+ ```python
83
+ import cellsweep
84
+ adata_cellsweep = cellsweep.denoise_count_matrix(adata_raw_path, adata_out=adata_cellsweep_path) # assumes that adata_raw_path is an h5ad file or AnnData object with a column adata.obs['celltype'] indicating celltype
85
+
86
+ # for help
87
+ help(cellsweep.denoise_count_matrix)
88
+ ```
89
+
90
+ ### Command line interface
91
+ ```
92
+ cellsweep denoise_count_matrix -o adata_cellsweep.h5ad adata_raw.h5ad # assumes that adata_raw.h5ad is an h5ad file with a column adata.obs['celltype'] indicating celltype
93
+
94
+ # for help
95
+ cellsweep denoise_count_matrix --help
96
+ ```
97
+
98
+ There are many utility functions in the `cellsweep.utils` module for data processing, plotting, and analysis. See examples in our Jupyter Notebooks.
99
+
100
+ ## Tutorials
101
+ We have several Jupyter Notebooks demonstrating the use of CellSweep for denoising count matrices and analyzing the results. See the `notebooks` folder in the repository.
@@ -0,0 +1,49 @@
1
+ # cellsweep
2
+
3
+ Sweep out noisy counts from single-cell RNA-seq data with CellSweep!
4
+
5
+ ![alt text](https://github.com/pachterlab/cellsweep/blob/main/figures/logo.png?raw=true)
6
+
7
+ ## Install
8
+ ### Basic use
9
+ ```
10
+ pip install cellsweep
11
+ ```
12
+
13
+ ### To run notebooks:
14
+ ```
15
+ pip install cellsweep[analysis]
16
+ ```
17
+
18
+ ### To remake figures from the paper:
19
+ ```
20
+ git clone https://github.com/pachterlab/cellsweep.git
21
+ cd cellsweep
22
+ conda env create -f environment.yml
23
+ pip install cellsweep[analysis]==0.1.0
24
+ ```
25
+
26
+ ## Quickstart
27
+ CellSweep has a single function denoise_count_matrix that takes a raw count matrix in an AnnData object and produces a denoised count matrix in another AnnData object. See a simple, fully worked example in the `notebooks/intro.ipynb` Jupyter Notebook.
28
+
29
+ ### Python API
30
+ ```python
31
+ import cellsweep
32
+ adata_cellsweep = cellsweep.denoise_count_matrix(adata_raw_path, adata_out=adata_cellsweep_path) # assumes that adata_raw_path is an h5ad file or AnnData object with a column adata.obs['celltype'] indicating celltype
33
+
34
+ # for help
35
+ help(cellsweep.denoise_count_matrix)
36
+ ```
37
+
38
+ ### Command line interface
39
+ ```
40
+ cellsweep denoise_count_matrix -o adata_cellsweep.h5ad adata_raw.h5ad # assumes that adata_raw.h5ad is an h5ad file with a column adata.obs['celltype'] indicating celltype
41
+
42
+ # for help
43
+ cellsweep denoise_count_matrix --help
44
+ ```
45
+
46
+ There are many utility functions in the `cellsweep.utils` module for data processing, plotting, and analysis. See examples in our Jupyter Notebooks.
47
+
48
+ ## Tutorials
49
+ We have several Jupyter Notebooks demonstrating the use of CellSweep for denoising count matrices and analyzing the results. See the `notebooks` folder in the repository.
@@ -0,0 +1,8 @@
1
+ """cellsweep package initialization module."""
2
+
3
+ from .model import denoise_count_matrix
4
+ # from .utils import * # only imports what is in __all__ in .utils/__init__.py
5
+
6
+ __version__ = "0.1.0"
7
+ __author__ = "Joseph Rich"
8
+ __email__ = "josephrich98@gmail.com"
@@ -0,0 +1,230 @@
1
+ """cellsweep constant values."""
2
+
3
+ CellBender_Fig2_to_Immune_All_High_celltype_mapping = {
4
+ "Monocytes/neutrophils": [
5
+ "Monocytes", "Mono-mac", "Monocyte precursor", "Macrophages", "Granulocytes"
6
+ ],
7
+
8
+ "Monocytes/pDCs": [
9
+ "DC", "DC precursor", "pDC", "pDC precursor", "MNP"
10
+ ],
11
+
12
+ "T": [
13
+ "T cells", "Double-negative thymocytes", "Double-positive thymocytes", "ETP"
14
+ ],
15
+
16
+ "B": [
17
+ "B cells", "B-cell lineage", "Plasma cells"
18
+ ],
19
+
20
+ "NK": [
21
+ "ILC", "ILC precursor" # ILCs include NK-like subsets
22
+ ],
23
+
24
+ "Progenitor": [
25
+ "HSC/MPP", "Early MK", "Megakaryocyte precursor"
26
+ ],
27
+
28
+ "Baso./neutro./progenitor": [
29
+ "Promyelocytes", "Myelocytes"
30
+ ],
31
+
32
+ }
33
+
34
+
35
+ # Broad-to-fine mapping
36
+ CellBender_Fig2_to_Immune_All_Low_celltype_mapping = {
37
+ "Monocytes/neutrophils": [
38
+ "Classical monocytes", "Non-classical monocytes", "Monocytes",
39
+ "Intermediate macrophages", "Intestinal macrophages", "Macrophages",
40
+ "Kupffer cells", "Kidney-resident macrophages", "Erythrophagocytic macrophages",
41
+ "Neutrophils", "Granulocytes", "Mono-mac", "Monocyte precursor"
42
+ ],
43
+
44
+ "Monocytes/pDCs": [
45
+ "pDC", "pDC precursor", "DC", "DC1", "DC2", "DC3",
46
+ "Transitional DC", "Migratory DCs", "Cycling DCs", "DC precursor"
47
+ ],
48
+
49
+ "TrueT CD4+ naive/Treg": [
50
+ "Tcm/Naive helper T cells", "Type 1 helper T cells", "Type 17 helper T cells",
51
+ "Regulatory T cells", "Treg(diff)", "Follicular helper T cells"
52
+ ],
53
+
54
+ "B": [
55
+ "B cells", "Cycling B cells", "Transitional B cells", "Age-associated B cells"
56
+ ],
57
+
58
+ "B naive": [
59
+ "Naive B cells", "Pre-pro-B cells", "Pro-B cells", "Small pre-B cells", "Large pre-B cells"
60
+ ],
61
+
62
+ "B memory": [
63
+ "Memory B cells", "Germinal center B cells", "Proliferative germinal center B cells"
64
+ ],
65
+
66
+ "T CD8+": [
67
+ "CD8a/a", "CD8a/b(entry)"
68
+ ],
69
+
70
+ "T cytotoxic": [
71
+ "Tem/Temra cytotoxic T cells", "Tem/Trm cytotoxic T cells",
72
+ "Trm cytotoxic T cells", "Tcm/Naive cytotoxic T cells",
73
+ "Memory CD4+ cytotoxic T cells"
74
+ ],
75
+
76
+ "T gd": [
77
+ "gamma-delta T cells", "CRTAM+ gamma-delta T cells", "Cycling gamma-delta T cells"
78
+ ],
79
+
80
+ "MAIT": [
81
+ "MAIT cells"
82
+ ],
83
+
84
+ "NK": [
85
+ "NK cells", "CD16+ NK cells", "CD16- NK cells",
86
+ "Cycling NK cells", "Transitional NK"
87
+ ],
88
+
89
+ "Monocyte NC/I": [
90
+ "Non-classical monocytes", "Intermediate macrophages"
91
+ ],
92
+
93
+ "Progenitor": [
94
+ "HSC/MPP", "CMP", "GMP", "MEMP", "ELP", "ETP",
95
+ "Early lymphoid/T lymphoid", "Early MK", "Megakaryocyte precursor",
96
+ "Megakaryocyte-erythroid-mast cell progenitor"
97
+ ],
98
+
99
+ "Baso./neutro./progenitor": [
100
+ "Promyelocytes", "Myelocytes", "Neutrophil-myeloid progenitor"
101
+ ],
102
+
103
+ "pDCs": [
104
+ "pDC", "pDC precursor"
105
+ ]
106
+ }
107
+
108
+
109
+ CellTypistHigh_to_ImmuneMajor = {
110
+ "Monocytes": "Monocytes",
111
+ "Mono-mac": "Monocytes",
112
+ "Monocyte precursor": "Macrophages",
113
+ "Macrophages": "Macrophages",
114
+ "Granulocytes": "Neutrophils",
115
+ "DC": "DC",
116
+ "DC precursor": "DC",
117
+ "pDC": "DC",
118
+ "pDC precursor": "DC",
119
+ "MNP": "Neutrophils",
120
+ "B cells": "B cells",
121
+ "B-cell lineage": "B cells",
122
+ "Plasma cells": "B cells",
123
+ "T cells": "CD4 T cells",
124
+ "Double-negative thymocytes": "CD4 T cells",
125
+ "Double-positive thymocytes": "CD4 T cells",
126
+ "ETP": "CD4 T cells",
127
+ }
128
+
129
+ CellTypistLow_to_ImmuneMajor = {
130
+
131
+ # ---- Monocytes/neutrophils ----
132
+ "Classical monocytes": "Monocytes",
133
+ "Non-classical monocytes": "Monocytes",
134
+ "Monocytes": "Monocytes",
135
+ "Monocyte precursor": "Monocytes",
136
+ "Mono-mac": "Monocytes",
137
+
138
+ "Intermediate macrophages": "Macrophages",
139
+ "Intestinal macrophages": "Macrophages",
140
+ "Macrophages": "Macrophages",
141
+ "Kupffer cells": "Macrophages",
142
+ "Kidney-resident macrophages": "Macrophages",
143
+ "Erythrophagocytic macrophages": "Macrophages",
144
+
145
+ "Neutrophils": "Neutrophils",
146
+ "Granulocytes": "Neutrophils",
147
+
148
+ # ---- Monocytes/pDCs ----
149
+ "pDC": "DC",
150
+ "pDC precursor": "DC",
151
+ "DC": "DC",
152
+ "DC1": "DC",
153
+ "DC2": "DC",
154
+ "DC3": "DC",
155
+ "Transitional DC": "DC",
156
+ "Migratory DCs": "DC",
157
+ "Cycling DCs": "DC",
158
+ "DC precursor": "DC",
159
+
160
+ # ---- CD4 T ----
161
+ "Tcm/Naive helper T cells": "CD4 T cells",
162
+ "Type 1 helper T cells": "CD4 T cells",
163
+ "Type 17 helper T cells": "CD4 T cells",
164
+ "Regulatory T cells": "CD4 T cells",
165
+ "Treg(diff)": "CD4 T cells",
166
+ "Follicular helper T cells": "CD4 T cells",
167
+
168
+ # ---- B ----
169
+ "B cells": "B cells",
170
+ "Cycling B cells": "B cells",
171
+ "Transitional B cells": "B cells",
172
+ "Age-associated B cells": "B cells",
173
+ "Naive B cells": "B cells",
174
+ "Pre-pro-B cells": "B cells",
175
+ "Pro-B cells": "B cells",
176
+ "Small pre-B cells": "B cells",
177
+ "Large pre-B cells": "B cells",
178
+ "Memory B cells": "B cells",
179
+ "Germinal center B cells": "B cells",
180
+ "Proliferative germinal center B cells": "B cells",
181
+
182
+ # ---- CD8 ----
183
+ "CD8a/a": "CD8 T cells",
184
+ "CD8a/b(entry)": "CD8 T cells",
185
+ "Tem/Temra cytotoxic T cells": "CD8 T cells",
186
+ "Tem/Trm cytotoxic T cells": "CD8 T cells",
187
+ "Trm cytotoxic T cells": "CD8 T cells",
188
+ "Tcm/Naive cytotoxic T cells": "CD8 T cells",
189
+ "Memory CD4+ cytotoxic T cells": "CD8 T cells",
190
+ "gamma-delta T cells": "CD8 T cells",
191
+ "CRTAM+ gamma-delta T cells": "CD8 T cells",
192
+ "Cycling gamma-delta T cells": "CD8 T cells",
193
+ "MAIT cells": "CD8 T cells",
194
+
195
+ # ---- NK ----
196
+ "NK cells": "NK cells",
197
+ "CD16+ NK cells": "NK cells",
198
+ "CD16- NK cells": "NK cells",
199
+ "Cycling NK cells": "NK cells",
200
+ "Transitional NK": "NK cells",
201
+
202
+ # ---- Progenitor ----
203
+ "HSC/MPP": "Monocytes", # default major category for HSC/MPP if forced into one bucket
204
+ "CMP": "Monocytes",
205
+ "GMP": "Neutrophils",
206
+ "MEMP": "Eosinophils",
207
+ "ELP": "B cells",
208
+ "ETP": "CD4 T cells",
209
+ "Early lymphoid/T lymphoid": "CD4 T cells",
210
+ "Early MK": "Monocytes",
211
+ "Megakaryocyte precursor": "Monocytes",
212
+ "Megakaryocyte-erythroid-mast cell progenitor": "Eosinophils",
213
+
214
+ # ---- Baso/neutro/progenitor ----
215
+ "Promyelocytes": "Neutrophils",
216
+ "Myelocytes": "Neutrophils",
217
+ "Neutrophil-myeloid progenitor": "Neutrophils",
218
+ }
219
+
220
+ immune_markers = {
221
+ "Monocytes": ["CD14", "LYZ", "FCGR3A", "MS4A7"],
222
+ "Macrophages": ["CD68", "CD163", "C1QA", "C1QB", "C1QC"],
223
+ "DC": ["CLEC9A", "XCR1", "CD1C", "FCER1A", "IL3RA", "TCF4", "ITGAX", "CST3"],
224
+ "Neutrophils": ["S100A8", "S100A9", "MPO", "FCGR3B", "ELANE"],
225
+ "Eosinophils": ["CLC", "RNASE2", "RNASE3", "PRG2"],
226
+ "CD8 T cells": ["CD8A", "CD8B", "GZMB", "CD3E"],
227
+ "CD4 T cells": ["CD4", "CCR7", "IL7R", "TCF7", "CD3E"],
228
+ "NK cells": ["NKG7", "GNLY", "PRF1", "KLRD1", "GZMB"],
229
+ "B cells": ["MS4A1", "CD79A", "CD79B", "HLA-DRA", "CD19"]
230
+ }
@@ -0,0 +1,287 @@
1
+ """main function for argparse."""
2
+
3
+ import argparse
4
+ import sys
5
+ from .__init__ import __version__
6
+ from .model import denoise_count_matrix
7
+
8
+ # Custom formatter for help messages that preserved the text formatting and adds the default value to the end of the help message
9
+ class CustomHelpFormatter(argparse.RawTextHelpFormatter):
10
+ def _get_help_string(self, action):
11
+ help_str = action.help if action.help else ""
12
+ if (
13
+ "%(default)" not in help_str
14
+ and action.default is not argparse.SUPPRESS
15
+ and action.default is not None
16
+ # default information can be deceptive or confusing for boolean flags.
17
+ # For example, `--quiet` says "Does not print progress information. (default: True)" even though
18
+ # the default action is to NOT be quiet (to the user, the default is False).
19
+ and not isinstance(action, argparse._StoreTrueAction)
20
+ and not isinstance(action, argparse._StoreFalseAction)
21
+ ):
22
+ help_str += " (default: %(default)s)"
23
+ return help_str
24
+
25
+
26
+ def main(): # noqa: C901
27
+ """
28
+ Function containing argparse parsers and arguments to allow the use of cellsweep from the terminal (as cellsweep).
29
+ """
30
+
31
+ parent_parser = argparse.ArgumentParser(description=f"cellsweep v{__version__}", add_help=False) # Define parent parser
32
+ parent_subparsers = parent_parser.add_subparsers(dest="command") # Initiate subparsers
33
+ parent = argparse.ArgumentParser(add_help=False)
34
+
35
+ # Add custom help argument to parent parser
36
+ parent_parser.add_argument("-h", "--help", action="store_true", help="Print manual.")
37
+ # Add custom version argument to parent parser
38
+ parent_parser.add_argument("-v", "--version", action="store_true", help="Print version.")
39
+
40
+ denoise_count_matrix_desc = "Denoise count matrix using cellsweep."
41
+
42
+ parser_denoise_count_matrix = parent_subparsers.add_parser(
43
+ "denoise_count_matrix",
44
+ parents=[parent],
45
+ description=denoise_count_matrix_desc,
46
+ help=denoise_count_matrix_desc,
47
+ add_help=True,
48
+ formatter_class=CustomHelpFormatter,
49
+ )
50
+
51
+ parser_denoise_count_matrix.add_argument(
52
+ "adata",
53
+ type=str,
54
+ help="Path to input AnnData file (.h5ad) containing raw count matrix in .X.",
55
+ )
56
+ parser_denoise_count_matrix.add_argument(
57
+ "-o",
58
+ "--adata_out",
59
+ type=str,
60
+ default="adata_denoised.h5ad",
61
+ help="Path to output AnnData file (.h5ad) to save denoised count matrix.",
62
+ )
63
+ parser_denoise_count_matrix.add_argument(
64
+ "--max_iter",
65
+ type=int,
66
+ default=2000,
67
+ help="Maximum number of EM iterations.",
68
+ )
69
+ parser_denoise_count_matrix.add_argument(
70
+ "--init_alpha",
71
+ type=float,
72
+ default=0.9,
73
+ help="Initial value of alpha_n for each cell.",
74
+ )
75
+ parser_denoise_count_matrix.add_argument(
76
+ "--alpha_cap",
77
+ type=float,
78
+ default=0.9,
79
+ help="alpha_n is not allowed to surpass this value in the first stage of training (before ll convergence). Barcodes that attempt to pass this threshold will be excluded from updating p_k and allowed to change cell-types.",
80
+ )
81
+ parser_denoise_count_matrix.add_argument(
82
+ "--init_beta",
83
+ type=float,
84
+ default=0.1,
85
+ help="Initial beta (percent bulk contamination) value for each cell.",
86
+ )
87
+ parser_denoise_count_matrix.add_argument(
88
+ "--eps",
89
+ type=float,
90
+ default=1e-12,
91
+ help="Numerical stability constant to prevent division by zero).",
92
+ )
93
+ parser_denoise_count_matrix.add_argument(
94
+ "--log_eps",
95
+ type=float,
96
+ default=1e-300,
97
+ help="Numerical stability constant to prevent log(0).",
98
+ )
99
+ parser_denoise_count_matrix.add_argument(
100
+ "--celltype_lambda",
101
+ type=float,
102
+ default=10,
103
+ help="Pseudocount for celltype profile update. Higher values lead to smoother celltype profiles",
104
+ )
105
+ parser_denoise_count_matrix.add_argument(
106
+ "--ambient_lambda",
107
+ type=float,
108
+ default=50,
109
+ help="Pseudocount for ambient profile update. Higher values lead to a smoother ambient profile.",
110
+ )
111
+ parser_denoise_count_matrix.add_argument(
112
+ "--bulk_lambda",
113
+ type=float,
114
+ default=10,
115
+ help="Pseudocount for bulk profile update. Higher values lead to a smoother bulk profile.",
116
+ )
117
+ parser_denoise_count_matrix.add_argument(
118
+ "--repulsion_strength",
119
+ type=float,
120
+ default=1e-4,
121
+ help="Strength of repulsion between ambient and cell-type profiles during M-step. Higher values lead to greater separation between ambient and cell-type profiles.",
122
+ )
123
+ parser_denoise_count_matrix.add_argument(
124
+ "--max_frac_gene_repulsion",
125
+ type=float,
126
+ default=0.2,
127
+ help="Maximum fraction of each p_k entry that can be subtracted during repulsion.",
128
+ )
129
+ parser_denoise_count_matrix.add_argument(
130
+ "--round_X",
131
+ action="store_true",
132
+ help="If True, rounds denoised counts to nearest integer before saving.",
133
+ )
134
+ parser_denoise_count_matrix.add_argument(
135
+ "-t", "--threads",
136
+ type=int,
137
+ default=1,
138
+ help="number of numba threads",
139
+ )
140
+ parser_denoise_count_matrix.add_argument(
141
+ "--disable_freeze_empty",
142
+ action="store_false",
143
+ help="If True, does not attempt to reestimate empty droplets."
144
+ )
145
+ parser_denoise_count_matrix.add_argument(
146
+ "--disable_freeze_ambient_profile",
147
+ action="store_false",
148
+ help="If True, does not update the ambient profile (a) based on alpha."
149
+ )
150
+ parser_denoise_count_matrix.add_argument(
151
+ "--empty_droplet_method",
152
+ type=str,
153
+ default="threshold",
154
+ choices=["threshold"],
155
+ help="Strategy to infer empty droplets if `is_empty` is not present."
156
+ )
157
+ parser_denoise_count_matrix.add_argument(
158
+ "--umi_cutoff",
159
+ type=int,
160
+ default=None,
161
+ help="Optional absolute UMI count threshold for classifying droplets as empty."
162
+ )
163
+ parser_denoise_count_matrix.add_argument(
164
+ "--expected_cells",
165
+ type=int,
166
+ default=None,
167
+ help="Expected number of real cells, used when estimating thresholds."
168
+ )
169
+ parser_denoise_count_matrix.add_argument(
170
+ "--del0_ll_tol",
171
+ type=float,
172
+ default=1e-3,
173
+ help="The change in likelihood, relative to the first likelihood step, below which repulsion and cell-type reassignment are discontinued and convergence is checked."
174
+ )
175
+ parser_denoise_count_matrix.add_argument(
176
+ "--min_ll_tol",
177
+ type=float,
178
+ default=1e-6,
179
+ help="The change in likelihood, relative to the current likelihood step, below which repulsion and cell-type reassignment are discontinued and convergence is checked. This is intended to cap `del0_ll_tol` at the edge of floating-point precision."
180
+ )
181
+ parser_denoise_count_matrix.add_argument(
182
+ "--tol_p",
183
+ type=float,
184
+ default=1e-4,
185
+ help="The maximum change in p below which training is discontinued. This is in addition to the tol_f stopping criterion.",
186
+ )
187
+ parser_denoise_count_matrix.add_argument(
188
+ "--tol_f",
189
+ type=float,
190
+ default=1e-4,
191
+ help="The maximum change in f = (1 - beta) * alpha + beta, below which training is discontinued. This is in addition to the tol_p stopping criterion.",
192
+ )
193
+ parser_denoise_count_matrix.add_argument(
194
+ "--random_state",
195
+ type=int,
196
+ default=42,
197
+ help="Random seed.",
198
+ )
199
+ parser_denoise_count_matrix.add_argument(
200
+ "-v", "--verbose",
201
+ action="count",
202
+ default=0,
203
+ help="Verbosity level. Default logging.WARNING, -v logging.INFO, -vv for logging.DEBUG)"
204
+ )
205
+ parser_denoise_count_matrix.add_argument(
206
+ "--quiet",
207
+ action="store_true",
208
+ help="Suppress all output (overrides any verbose flag)",
209
+ )
210
+ # no need because adata is always a file path in CLI
211
+ # parser_denoise_count_matrix.add_argument(
212
+ # "--disable_copy_anndata",
213
+ # action="store_false",
214
+ # help="If adata is an Anndata object, then copy it to avoid modifying the input in-place."
215
+ # )
216
+ parser_denoise_count_matrix.add_argument(
217
+ "--log_file",
218
+ type=str,
219
+ default=None,
220
+ help="Optional path to save EM iteration logs.",
221
+ )
222
+
223
+ args, unknown_args = parent_parser.parse_known_args()
224
+
225
+ # Help return
226
+ if args.help:
227
+ # Retrieve all subparsers from the parent parser
228
+ subparsers_actions = [action for action in parent_parser._actions if isinstance(action, argparse._SubParsersAction)]
229
+ for subparsers_action in subparsers_actions:
230
+ # Get all subparsers and print help
231
+ for choice, subparser in subparsers_action.choices.items():
232
+ print("Subparser '{}'".format(choice))
233
+ print(subparser.format_help())
234
+ sys.exit(1)
235
+
236
+ # Version return
237
+ if args.version:
238
+ print(f"varseek version: {__version__}")
239
+ sys.exit(1)
240
+
241
+ # Show help when no arguments are given
242
+ if len(sys.argv) == 1:
243
+ parent_parser.print_help(sys.stderr)
244
+ sys.exit(1)
245
+
246
+ command_to_parser = {
247
+ "denoise_count_matrix": parser_denoise_count_matrix,
248
+ }
249
+
250
+ if len(sys.argv) == 2:
251
+ if sys.argv[1] in command_to_parser:
252
+ command_to_parser[sys.argv[1]].print_help(sys.stderr)
253
+ else:
254
+ parent_parser.print_help(sys.stderr)
255
+ sys.exit(1)
256
+
257
+ if args.command == "denoise_count_matrix":
258
+ denoise_count_matrix(
259
+ adata=args.adata,
260
+ adata_out=args.adata_out,
261
+ max_iter=args.max_iter,
262
+ init_alpha=args.init_alpha,
263
+ alpha_cap=args.alpha_cap,
264
+ beta=args.int_beta,
265
+ eps=args.eps,
266
+ log_eps=args.log_eps,
267
+ celltype_lambda=args.celltype_lambda,
268
+ ambient_lambda=args.ambient_lambda,
269
+ bulk_lambda=args.bulk_lambda,
270
+ repulsion_strength=args.repulsion_strength,
271
+ max_frac_gene_repulsion=args.max_frac_gene_repulsion,
272
+ round_X=args.round_X,
273
+ threads=args.threads,
274
+ freeze_empty=args.disable_freeze_empty,
275
+ empty_droplet_method=args.empty_droplet_method,
276
+ umi_cutoff=args.umi_cutoff,
277
+ expected_cells=args.expected_cells,
278
+ tol=args.tol,
279
+ min_tol=args.min_tol,
280
+ tol_p=args.tol_p,
281
+ tol_f=args.tol_f,
282
+ random_state=args.random_state,
283
+ verbose=args.verbose,
284
+ quiet=args.quiet,
285
+ log_file=args.log_file,
286
+ )
287
+