M3Drop 0.2.6__tar.gz → 0.4.40__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {m3drop-0.2.6 → m3drop-0.4.40}/LICENSE +2 -2
- {m3drop-0.2.6 → m3drop-0.4.40/M3Drop.egg-info}/PKG-INFO +137 -133
- m3drop-0.4.40/M3Drop.egg-info/SOURCES.txt +16 -0
- m3drop-0.4.40/M3Drop.egg-info/requires.txt +15 -0
- {m3drop-0.2.6/M3Drop.egg-info → m3drop-0.4.40}/PKG-INFO +137 -133
- m3drop-0.4.40/m3Drop/__init__.py +106 -0
- m3drop-0.4.40/m3Drop/coreCPU.py +477 -0
- m3drop-0.4.40/m3Drop/coreGPU.py +591 -0
- m3drop-0.4.40/m3Drop/diagnosticsCPU.py +391 -0
- m3drop-0.4.40/m3Drop/diagnosticsGPU.py +504 -0
- m3drop-0.4.40/m3Drop/normalizationCPU.py +146 -0
- m3drop-0.2.6/m3Drop/NormalizationGPU.py → m3drop-0.4.40/m3Drop/normalizationGPU.py +213 -201
- {m3drop-0.2.6 → m3drop-0.4.40}/setup.cfg +4 -4
- {m3drop-0.2.6 → m3drop-0.4.40}/setup.py +18 -14
- m3drop-0.2.6/M3Drop.egg-info/SOURCES.txt +0 -37
- m3drop-0.2.6/M3Drop.egg-info/requires.txt +0 -11
- m3drop-0.2.6/m3Drop/Brennecke_implementation.py +0 -139
- m3drop-0.2.6/m3Drop/Curve_fitting.py +0 -443
- m3drop-0.2.6/m3Drop/DANB_Coexpression.py +0 -99
- m3drop-0.2.6/m3Drop/DANB_HVG.py +0 -85
- m3drop-0.2.6/m3Drop/Extremes.py +0 -423
- m3drop-0.2.6/m3Drop/M3D_Imputation.py +0 -93
- m3drop-0.2.6/m3Drop/NB_UMI.py +0 -2364
- m3drop-0.2.6/m3Drop/Normalization.py +0 -257
- m3drop-0.2.6/m3Drop/Other_FS_functions.py +0 -431
- m3drop-0.2.6/m3Drop/Plotting_fxns.py +0 -270
- m3drop-0.2.6/m3Drop/Simulations_Functions.py +0 -335
- m3drop-0.2.6/m3Drop/Simulations_GPU.py +0 -411
- m3drop-0.2.6/m3Drop/Threeway_ProportionalArea_VennDiagrams.py +0 -305
- m3drop-0.2.6/m3Drop/Traditional_DE.py +0 -421
- m3drop-0.2.6/m3Drop/__init__.py +0 -219
- m3drop-0.2.6/m3Drop/basics.py +0 -726
- m3drop-0.2.6/m3Drop/scanpy.py +0 -461
- m3drop-0.2.6/tests/test_M3DropCleanData.py +0 -41
- m3drop-0.2.6/tests/test_M3DropConvertData.py +0 -58
- m3drop-0.2.6/tests/test_M3DropFeatureSelection.py +0 -178
- m3drop-0.2.6/tests/test_M3DropGetMarkers.py +0 -42
- m3drop-0.2.6/tests/test_M3DropTestShift.py +0 -64
- m3drop-0.2.6/tests/test_brennecke.py +0 -40
- m3drop-0.2.6/tests/test_gene_name_preservation.py +0 -147
- m3drop-0.2.6/tests/test_nbumi_no_plots.py +0 -225
- m3drop-0.2.6/tests/test_real_data_gene_preservation.py +0 -347
- m3drop-0.2.6/tests/test_sparse_optimization.py +0 -232
- {m3drop-0.2.6 → m3drop-0.4.40}/M3Drop.egg-info/dependency_links.txt +0 -0
- {m3drop-0.2.6 → m3drop-0.4.40}/M3Drop.egg-info/top_level.txt +0 -0
- {m3drop-0.2.6 → m3drop-0.4.40}/README.md +0 -0
- {m3drop-0.2.6 → m3drop-0.4.40}/pyproject.toml +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
MIT License
|
|
2
2
|
|
|
3
|
-
Copyright (c) 2025
|
|
3
|
+
Copyright (c) 2025 Tallulah Andrews
|
|
4
4
|
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
|
@@ -18,4 +18,4 @@ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
|
18
18
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
19
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
20
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
21
|
+
SOFTWARE.
|
|
@@ -1,133 +1,137 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: M3Drop
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
|
|
5
|
-
Home-page: https://github.com/PragalvhaSharma/m3DropNew
|
|
6
|
-
Author: Tallulah Andrews
|
|
7
|
-
Author-email: tandrew6@uwo.ca
|
|
8
|
-
License: MIT
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
-
Classifier: Intended Audience :: Science/Research
|
|
13
|
-
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
14
|
-
Requires-Python: >=3.8
|
|
15
|
-
Description-Content-Type: text/markdown
|
|
16
|
-
License-File: LICENSE
|
|
17
|
-
Requires-Dist: anndata
|
|
18
|
-
Requires-Dist:
|
|
19
|
-
Requires-Dist: matplotlib
|
|
20
|
-
Requires-Dist:
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist:
|
|
23
|
-
Requires-Dist:
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist:
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
Dynamic:
|
|
32
|
-
Dynamic:
|
|
33
|
-
Dynamic:
|
|
34
|
-
Dynamic:
|
|
35
|
-
Dynamic:
|
|
36
|
-
Dynamic:
|
|
37
|
-
Dynamic:
|
|
38
|
-
Dynamic:
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
##
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
- **
|
|
68
|
-
- **
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
- **
|
|
73
|
-
- **
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
#
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
#
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
#
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
#
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
##
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: M3Drop
|
|
3
|
+
Version: 0.4.40
|
|
4
|
+
Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
|
|
5
|
+
Home-page: https://github.com/PragalvhaSharma/m3DropNew
|
|
6
|
+
Author: Tallulah Andrews
|
|
7
|
+
Author-email: tandrew6@uwo.ca
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: anndata>=0.8.0
|
|
18
|
+
Requires-Dist: h5py>=3.8.0
|
|
19
|
+
Requires-Dist: matplotlib>=3.5.0
|
|
20
|
+
Requires-Dist: matplotlib-venn>=0.11
|
|
21
|
+
Requires-Dist: memory_profiler>=0.60.0
|
|
22
|
+
Requires-Dist: numpy>=1.21.0
|
|
23
|
+
Requires-Dist: pandas>=1.5.0
|
|
24
|
+
Requires-Dist: scanpy>=1.9.0
|
|
25
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
26
|
+
Requires-Dist: scipy>=1.8.0
|
|
27
|
+
Requires-Dist: seaborn>=0.11.0
|
|
28
|
+
Requires-Dist: statsmodels>=0.13.0
|
|
29
|
+
Provides-Extra: gpu
|
|
30
|
+
Requires-Dist: cupy-cuda12x; extra == "gpu"
|
|
31
|
+
Dynamic: author
|
|
32
|
+
Dynamic: author-email
|
|
33
|
+
Dynamic: classifier
|
|
34
|
+
Dynamic: description
|
|
35
|
+
Dynamic: description-content-type
|
|
36
|
+
Dynamic: home-page
|
|
37
|
+
Dynamic: license
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
Dynamic: provides-extra
|
|
40
|
+
Dynamic: requires-dist
|
|
41
|
+
Dynamic: requires-python
|
|
42
|
+
Dynamic: summary
|
|
43
|
+
|
|
44
|
+
# M3DropPy
|
|
45
|
+
|
|
46
|
+
A Python implementation of the M3Drop single-cell RNA-seq analysis tool, originally developed as an R package.
|
|
47
|
+
|
|
48
|
+
## About
|
|
49
|
+
|
|
50
|
+
M3DropPy is a Python conversion of the popular M3Drop R package for dropout-based feature selection in single-cell RNA sequencing data. This package provides powerful methods for identifying highly variable and differentially expressed genes by leveraging the high frequency of dropout events (zero expression values) that are characteristic of single-cell RNA-seq data.
|
|
51
|
+
|
|
52
|
+
## Background
|
|
53
|
+
|
|
54
|
+
Single-cell RNA sequencing often results in a large number of dropouts (genes with zero expression in particular cells) due to the technical challenges of reverse-transcribing and amplifying small quantities of RNA from individual cells. M3Drop takes advantage of this characteristic by modeling the relationship between dropout rate and mean expression using the Michaelis-Menten equation:
|
|
55
|
+
|
|
56
|
+
**P_i = 1 - S_i/(S_i + K)**
|
|
57
|
+
|
|
58
|
+
Where:
|
|
59
|
+
- P_i is the proportion of cells where gene i drops out
|
|
60
|
+
- S_i is the mean expression of gene i
|
|
61
|
+
- K is the Michaelis constant
|
|
62
|
+
|
|
63
|
+
## Key Features
|
|
64
|
+
|
|
65
|
+
### M3Drop Method
|
|
66
|
+
- **Michaelis-Menten Modeling**: Models dropout rates using enzyme kinetics principles
|
|
67
|
+
- **Feature Selection**: Identifies differentially expressed genes by detecting outliers from the fitted curve
|
|
68
|
+
- **Optimized for Smart-seq2**: Works best with full-transcript protocols without UMIs
|
|
69
|
+
|
|
70
|
+
### DANB (Depth-Adjusted Negative Binomial) Method
|
|
71
|
+
- **UMI Compatibility**: Specifically designed for UMI-tagged data (10X Chromium, etc.)
|
|
72
|
+
- **Depth Adjustment**: Accounts for sequencing depth variations across cells
|
|
73
|
+
- **Negative Binomial Modeling**: Models count data with appropriate variance structure
|
|
74
|
+
|
|
75
|
+
### Additional Methods
|
|
76
|
+
- **Brennecke Method**: Implementation of highly variable gene detection
|
|
77
|
+
- **Consensus Feature Selection**: Combines multiple feature selection approaches
|
|
78
|
+
- **Pearson Residuals**: Alternative normalization for UMI data
|
|
79
|
+
|
|
80
|
+
## Installation
|
|
81
|
+
|
|
82
|
+
You can install M3DropPy using pip:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install M3Drop
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Imports
|
|
89
|
+
|
|
90
|
+
You can import specific functions from different modules:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from m3Drop.basics import your_function_name
|
|
94
|
+
from m3Drop.M3D_Imputation import another_function
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Usage
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from m3Drop.basics import M3DropConvertData, M3DropFeatureSelection
|
|
101
|
+
from m3Drop.NB_UMI import NBumiFitModel, NBumiFeatureSelectionCombinedDrop, NBumiPearsonResiduals
|
|
102
|
+
|
|
103
|
+
# Load your single-cell expression data
|
|
104
|
+
# counts should be a genes x cells matrix
|
|
105
|
+
|
|
106
|
+
# For non-UMI data (Smart-seq2, etc.)
|
|
107
|
+
# Convert and normalize data
|
|
108
|
+
norm_data = M3DropConvertData(counts, is_counts=True)
|
|
109
|
+
|
|
110
|
+
# Perform M3Drop feature selection
|
|
111
|
+
selected_genes = M3DropFeatureSelection(norm_data, mt_method="fdr", mt_threshold=0.01)
|
|
112
|
+
|
|
113
|
+
# For UMI data (10X Chromium, etc.)
|
|
114
|
+
# Fit DANB model
|
|
115
|
+
danb_fit = NBumiFitModel(counts)
|
|
116
|
+
|
|
117
|
+
# Perform dropout-based feature selection
|
|
118
|
+
selected_genes = NBumiFeatureSelectionCombinedDrop(danb_fit, method="fdr", qval_thres=0.01)
|
|
119
|
+
|
|
120
|
+
# Calculate Pearson residuals for normalization
|
|
121
|
+
pearson_residuals = NBumiPearsonResiduals(counts, danb_fit)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## When to Use Each Method
|
|
125
|
+
|
|
126
|
+
- **M3Drop**: Use for Smart-seq2 and other full-transcript protocols without UMIs
|
|
127
|
+
- **DANB/NBumi**: Use for UMI-tagged data like 10X Chromium
|
|
128
|
+
- **Consensus**: Use when you want to combine multiple feature selection approaches
|
|
129
|
+
|
|
130
|
+
## Original R Package
|
|
131
|
+
|
|
132
|
+
This Python implementation is based on the M3Drop R package developed by Tallulah Andrews and converted to Python by Anthony Son and Pragalvha Sharma.
|
|
133
|
+
|
|
134
|
+
## Citation
|
|
135
|
+
|
|
136
|
+
If you use M3DropPy in your research, please cite the original M3Drop paper:
|
|
137
|
+
- Andrews, T.S. and Hemberg, M. (2019). M3Drop: Dropout-based feature selection for scRNASeq. Bioinformatics, 35(16), 2865-2867.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
setup.py
|
|
5
|
+
M3Drop.egg-info/PKG-INFO
|
|
6
|
+
M3Drop.egg-info/SOURCES.txt
|
|
7
|
+
M3Drop.egg-info/dependency_links.txt
|
|
8
|
+
M3Drop.egg-info/requires.txt
|
|
9
|
+
M3Drop.egg-info/top_level.txt
|
|
10
|
+
m3Drop/__init__.py
|
|
11
|
+
m3Drop/coreCPU.py
|
|
12
|
+
m3Drop/coreGPU.py
|
|
13
|
+
m3Drop/diagnosticsCPU.py
|
|
14
|
+
m3Drop/diagnosticsGPU.py
|
|
15
|
+
m3Drop/normalizationCPU.py
|
|
16
|
+
m3Drop/normalizationGPU.py
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
anndata>=0.8.0
|
|
2
|
+
h5py>=3.8.0
|
|
3
|
+
matplotlib>=3.5.0
|
|
4
|
+
matplotlib-venn>=0.11
|
|
5
|
+
memory_profiler>=0.60.0
|
|
6
|
+
numpy>=1.21.0
|
|
7
|
+
pandas>=1.5.0
|
|
8
|
+
scanpy>=1.9.0
|
|
9
|
+
scikit-learn>=1.0.0
|
|
10
|
+
scipy>=1.8.0
|
|
11
|
+
seaborn>=0.11.0
|
|
12
|
+
statsmodels>=0.13.0
|
|
13
|
+
|
|
14
|
+
[gpu]
|
|
15
|
+
cupy-cuda12x
|
|
@@ -1,133 +1,137 @@
|
|
|
1
|
-
Metadata-Version: 2.4
|
|
2
|
-
Name: M3Drop
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
|
|
5
|
-
Home-page: https://github.com/PragalvhaSharma/m3DropNew
|
|
6
|
-
Author: Tallulah Andrews
|
|
7
|
-
Author-email: tandrew6@uwo.ca
|
|
8
|
-
License: MIT
|
|
9
|
-
Classifier: Programming Language :: Python :: 3
|
|
10
|
-
Classifier: Operating System :: OS Independent
|
|
11
|
-
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
-
Classifier: Intended Audience :: Science/Research
|
|
13
|
-
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
14
|
-
Requires-Python: >=3.8
|
|
15
|
-
Description-Content-Type: text/markdown
|
|
16
|
-
License-File: LICENSE
|
|
17
|
-
Requires-Dist: anndata
|
|
18
|
-
Requires-Dist:
|
|
19
|
-
Requires-Dist: matplotlib
|
|
20
|
-
Requires-Dist:
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist:
|
|
23
|
-
Requires-Dist:
|
|
24
|
-
Requires-Dist:
|
|
25
|
-
Requires-Dist:
|
|
26
|
-
Requires-Dist:
|
|
27
|
-
Requires-Dist:
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
Dynamic:
|
|
32
|
-
Dynamic:
|
|
33
|
-
Dynamic:
|
|
34
|
-
Dynamic:
|
|
35
|
-
Dynamic:
|
|
36
|
-
Dynamic:
|
|
37
|
-
Dynamic:
|
|
38
|
-
Dynamic:
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
##
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
- **
|
|
68
|
-
- **
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
- **
|
|
73
|
-
- **
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
```
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
#
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
#
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
#
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
#
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
##
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: M3Drop
|
|
3
|
+
Version: 0.4.40
|
|
4
|
+
Summary: A Python implementation of the M3Drop single-cell RNA-seq analysis tool.
|
|
5
|
+
Home-page: https://github.com/PragalvhaSharma/m3DropNew
|
|
6
|
+
Author: Tallulah Andrews
|
|
7
|
+
Author-email: tandrew6@uwo.ca
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
|
|
14
|
+
Requires-Python: >=3.8
|
|
15
|
+
Description-Content-Type: text/markdown
|
|
16
|
+
License-File: LICENSE
|
|
17
|
+
Requires-Dist: anndata>=0.8.0
|
|
18
|
+
Requires-Dist: h5py>=3.8.0
|
|
19
|
+
Requires-Dist: matplotlib>=3.5.0
|
|
20
|
+
Requires-Dist: matplotlib-venn>=0.11
|
|
21
|
+
Requires-Dist: memory_profiler>=0.60.0
|
|
22
|
+
Requires-Dist: numpy>=1.21.0
|
|
23
|
+
Requires-Dist: pandas>=1.5.0
|
|
24
|
+
Requires-Dist: scanpy>=1.9.0
|
|
25
|
+
Requires-Dist: scikit-learn>=1.0.0
|
|
26
|
+
Requires-Dist: scipy>=1.8.0
|
|
27
|
+
Requires-Dist: seaborn>=0.11.0
|
|
28
|
+
Requires-Dist: statsmodels>=0.13.0
|
|
29
|
+
Provides-Extra: gpu
|
|
30
|
+
Requires-Dist: cupy-cuda12x; extra == "gpu"
|
|
31
|
+
Dynamic: author
|
|
32
|
+
Dynamic: author-email
|
|
33
|
+
Dynamic: classifier
|
|
34
|
+
Dynamic: description
|
|
35
|
+
Dynamic: description-content-type
|
|
36
|
+
Dynamic: home-page
|
|
37
|
+
Dynamic: license
|
|
38
|
+
Dynamic: license-file
|
|
39
|
+
Dynamic: provides-extra
|
|
40
|
+
Dynamic: requires-dist
|
|
41
|
+
Dynamic: requires-python
|
|
42
|
+
Dynamic: summary
|
|
43
|
+
|
|
44
|
+
# M3DropPy
|
|
45
|
+
|
|
46
|
+
A Python implementation of the M3Drop single-cell RNA-seq analysis tool, originally developed as an R package.
|
|
47
|
+
|
|
48
|
+
## About
|
|
49
|
+
|
|
50
|
+
M3DropPy is a Python conversion of the popular M3Drop R package for dropout-based feature selection in single-cell RNA sequencing data. This package provides powerful methods for identifying highly variable and differentially expressed genes by leveraging the high frequency of dropout events (zero expression values) that are characteristic of single-cell RNA-seq data.
|
|
51
|
+
|
|
52
|
+
## Background
|
|
53
|
+
|
|
54
|
+
Single-cell RNA sequencing often results in a large number of dropouts (genes with zero expression in particular cells) due to the technical challenges of reverse-transcribing and amplifying small quantities of RNA from individual cells. M3Drop takes advantage of this characteristic by modeling the relationship between dropout rate and mean expression using the Michaelis-Menten equation:
|
|
55
|
+
|
|
56
|
+
**P_i = 1 - S_i/(S_i + K)**
|
|
57
|
+
|
|
58
|
+
Where:
|
|
59
|
+
- P_i is the proportion of cells where gene i drops out
|
|
60
|
+
- S_i is the mean expression of gene i
|
|
61
|
+
- K is the Michaelis constant
|
|
62
|
+
|
|
63
|
+
## Key Features
|
|
64
|
+
|
|
65
|
+
### M3Drop Method
|
|
66
|
+
- **Michaelis-Menten Modeling**: Models dropout rates using enzyme kinetics principles
|
|
67
|
+
- **Feature Selection**: Identifies differentially expressed genes by detecting outliers from the fitted curve
|
|
68
|
+
- **Optimized for Smart-seq2**: Works best with full-transcript protocols without UMIs
|
|
69
|
+
|
|
70
|
+
### DANB (Depth-Adjusted Negative Binomial) Method
|
|
71
|
+
- **UMI Compatibility**: Specifically designed for UMI-tagged data (10X Chromium, etc.)
|
|
72
|
+
- **Depth Adjustment**: Accounts for sequencing depth variations across cells
|
|
73
|
+
- **Negative Binomial Modeling**: Models count data with appropriate variance structure
|
|
74
|
+
|
|
75
|
+
### Additional Methods
|
|
76
|
+
- **Brennecke Method**: Implementation of highly variable gene detection
|
|
77
|
+
- **Consensus Feature Selection**: Combines multiple feature selection approaches
|
|
78
|
+
- **Pearson Residuals**: Alternative normalization for UMI data
|
|
79
|
+
|
|
80
|
+
## Installation
|
|
81
|
+
|
|
82
|
+
You can install M3DropPy using pip:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install M3Drop
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Imports
|
|
89
|
+
|
|
90
|
+
You can import specific functions from different modules:
|
|
91
|
+
|
|
92
|
+
```python
|
|
93
|
+
from m3Drop.basics import your_function_name
|
|
94
|
+
from m3Drop.M3D_Imputation import another_function
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Usage
|
|
98
|
+
|
|
99
|
+
```python
|
|
100
|
+
from m3Drop.basics import M3DropConvertData, M3DropFeatureSelection
|
|
101
|
+
from m3Drop.NB_UMI import NBumiFitModel, NBumiFeatureSelectionCombinedDrop, NBumiPearsonResiduals
|
|
102
|
+
|
|
103
|
+
# Load your single-cell expression data
|
|
104
|
+
# counts should be a genes x cells matrix
|
|
105
|
+
|
|
106
|
+
# For non-UMI data (Smart-seq2, etc.)
|
|
107
|
+
# Convert and normalize data
|
|
108
|
+
norm_data = M3DropConvertData(counts, is_counts=True)
|
|
109
|
+
|
|
110
|
+
# Perform M3Drop feature selection
|
|
111
|
+
selected_genes = M3DropFeatureSelection(norm_data, mt_method="fdr", mt_threshold=0.01)
|
|
112
|
+
|
|
113
|
+
# For UMI data (10X Chromium, etc.)
|
|
114
|
+
# Fit DANB model
|
|
115
|
+
danb_fit = NBumiFitModel(counts)
|
|
116
|
+
|
|
117
|
+
# Perform dropout-based feature selection
|
|
118
|
+
selected_genes = NBumiFeatureSelectionCombinedDrop(danb_fit, method="fdr", qval_thres=0.01)
|
|
119
|
+
|
|
120
|
+
# Calculate Pearson residuals for normalization
|
|
121
|
+
pearson_residuals = NBumiPearsonResiduals(counts, danb_fit)
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
## When to Use Each Method
|
|
125
|
+
|
|
126
|
+
- **M3Drop**: Use for Smart-seq2 and other full-transcript protocols without UMIs
|
|
127
|
+
- **DANB/NBumi**: Use for UMI-tagged data like 10X Chromium
|
|
128
|
+
- **Consensus**: Use when you want to combine multiple feature selection approaches
|
|
129
|
+
|
|
130
|
+
## Original R Package
|
|
131
|
+
|
|
132
|
+
This Python implementation is based on the M3Drop R package developed by Tallulah Andrews and converted to Python by Anthony Son and Pragalvha Sharma.
|
|
133
|
+
|
|
134
|
+
## Citation
|
|
135
|
+
|
|
136
|
+
If you use M3DropPy in your research, please cite the original M3Drop paper:
|
|
137
|
+
- Andrews, T.S. and Hemberg, M. (2019). M3Drop: Dropout-based feature selection for scRNASeq. Bioinformatics, 35(16), 2865-2867.
|