SURE-tools 1.0.8__py3-none-any.whl → 1.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of SURE-tools might be problematic. Click here for more details.
- SURE/SURE.py +3 -0
- SURE/SURE2.py +1236 -0
- SURE/__init__.py +4 -1
- SURE/atac/__init__.py +1 -0
- SURE/atac/utils.py +151 -0
- {sure_tools-1.0.8.dist-info → sure_tools-1.0.10.dist-info}/METADATA +1 -1
- sure_tools-1.0.10.dist-info/RECORD +20 -0
- sure_tools-1.0.8.dist-info/RECORD +0 -17
- {sure_tools-1.0.8.dist-info → sure_tools-1.0.10.dist-info}/WHEEL +0 -0
- {sure_tools-1.0.8.dist-info → sure_tools-1.0.10.dist-info}/entry_points.txt +0 -0
- {sure_tools-1.0.8.dist-info → sure_tools-1.0.10.dist-info}/licenses/LICENSE +0 -0
- {sure_tools-1.0.8.dist-info → sure_tools-1.0.10.dist-info}/top_level.txt +0 -0
SURE/__init__.py
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from .SURE import SURE
|
|
2
|
+
from .SURE2 import SURE2
|
|
2
3
|
|
|
3
4
|
from . import utils
|
|
4
5
|
from . import codebook
|
|
5
6
|
from . import SURE
|
|
7
|
+
from . import SURE2
|
|
8
|
+
from . import atac
|
|
6
9
|
|
|
7
|
-
__all__ = ['SURE', 'utils', 'codebook']
|
|
10
|
+
__all__ = ['SURE','SURE2', 'atac', 'utils', 'codebook']
|
SURE/atac/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .utils import tfidf, binarize
|
SURE/atac/utils.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
from typing import Union, Optional
|
|
2
|
+
from warnings import warn
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
from scipy.sparse import csr_matrix, dia_matrix, issparse
|
|
6
|
+
from sklearn.preprocessing import MinMaxScaler
|
|
7
|
+
|
|
8
|
+
from anndata import AnnData
|
|
9
|
+
|
|
10
|
+
import scanpy as sc
|
|
11
|
+
from scanpy._utils import view_to_actual
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# Computational methods for preprocessing
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def tfidf(
|
|
18
|
+
data: AnnData,
|
|
19
|
+
log_tf: bool = True,
|
|
20
|
+
log_idf: bool = True,
|
|
21
|
+
log_tfidf: bool = False,
|
|
22
|
+
scale_factor: Union[int, float] = 1e4,
|
|
23
|
+
inplace: bool = True,
|
|
24
|
+
copy: bool = False,
|
|
25
|
+
from_layer: Optional[str] = None,
|
|
26
|
+
to_layer: Optional[str] = None,
|
|
27
|
+
):
|
|
28
|
+
"""
|
|
29
|
+
Transform peak counts with TF-IDF (Term Frequency - Inverse Document Frequency).
|
|
30
|
+
|
|
31
|
+
TF: peak counts are normalised by total number of counts per cell
|
|
32
|
+
DF: total number of counts for each peak
|
|
33
|
+
IDF: number of cells divided by DF
|
|
34
|
+
|
|
35
|
+
By default, log(TF) * log(IDF) is returned.
|
|
36
|
+
|
|
37
|
+
Parameters
|
|
38
|
+
----------
|
|
39
|
+
data
|
|
40
|
+
AnnData object with peak counts.
|
|
41
|
+
log_idf
|
|
42
|
+
Log-transform IDF term (True by default).
|
|
43
|
+
log_tf
|
|
44
|
+
Log-transform TF term (True by default).
|
|
45
|
+
log_tfidf
|
|
46
|
+
Log-transform TF*IDF term (False by default).
|
|
47
|
+
Can only be used when log_tf and log_idf are False.
|
|
48
|
+
scale_factor
|
|
49
|
+
Scale factor to multiply the TF-IDF matrix by (1e4 by default).
|
|
50
|
+
inplace
|
|
51
|
+
If to modify counts in the AnnData object (True by default).
|
|
52
|
+
copy
|
|
53
|
+
If to return a copy of the AnnData object or the 'atac' modality (False by default).
|
|
54
|
+
Not compatible with inplace=False.
|
|
55
|
+
from_layer
|
|
56
|
+
Layer to use counts (AnnData.layers[from_layer])
|
|
57
|
+
instead of AnnData.X used by default.
|
|
58
|
+
to_layer
|
|
59
|
+
Layer to save transformed counts to (AnnData.layers[to_layer])
|
|
60
|
+
instead of AnnData.X used by default.
|
|
61
|
+
Not compatible with inplace=False.
|
|
62
|
+
"""
|
|
63
|
+
if isinstance(data, AnnData):
|
|
64
|
+
adata = data
|
|
65
|
+
else:
|
|
66
|
+
raise TypeError("Expected AnnData object")
|
|
67
|
+
|
|
68
|
+
if log_tfidf and (log_tf or log_idf):
|
|
69
|
+
raise AttributeError(
|
|
70
|
+
"When returning log(TF*IDF), \
|
|
71
|
+
applying neither log(TF) nor log(IDF) is possible."
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
if copy and not inplace:
|
|
75
|
+
raise ValueError("`copy=True` cannot be used with `inplace=False`.")
|
|
76
|
+
|
|
77
|
+
if to_layer is not None and not inplace:
|
|
78
|
+
raise ValueError(f"`to_layer='{str(to_layer)}'` cannot be used with `inplace=False`.")
|
|
79
|
+
|
|
80
|
+
if copy:
|
|
81
|
+
adata = adata.copy()
|
|
82
|
+
|
|
83
|
+
view_to_actual(adata)
|
|
84
|
+
|
|
85
|
+
counts = adata.X if from_layer is None else adata.layers[from_layer]
|
|
86
|
+
|
|
87
|
+
# Check before the computation
|
|
88
|
+
if to_layer is not None and to_layer in adata.layers:
|
|
89
|
+
warn(f"Existing layer '{str(to_layer)}' will be overwritten")
|
|
90
|
+
|
|
91
|
+
if issparse(counts):
|
|
92
|
+
n_peaks = np.asarray(counts.sum(axis=1)).reshape(-1)
|
|
93
|
+
n_peaks = dia_matrix((1.0 / n_peaks, 0), shape=(n_peaks.size, n_peaks.size))
|
|
94
|
+
# This prevents making TF dense
|
|
95
|
+
tf = np.dot(n_peaks, counts)
|
|
96
|
+
else:
|
|
97
|
+
n_peaks = np.asarray(counts.sum(axis=1)).reshape(-1, 1)
|
|
98
|
+
tf = counts / n_peaks
|
|
99
|
+
|
|
100
|
+
if scale_factor is not None and scale_factor != 0 and scale_factor != 1:
|
|
101
|
+
tf = tf * scale_factor
|
|
102
|
+
if log_tf:
|
|
103
|
+
tf = np.log1p(tf)
|
|
104
|
+
|
|
105
|
+
idf = np.asarray(adata.shape[0] / counts.sum(axis=0)).reshape(-1)
|
|
106
|
+
if log_idf:
|
|
107
|
+
idf = np.log1p(idf)
|
|
108
|
+
|
|
109
|
+
if issparse(tf):
|
|
110
|
+
idf = dia_matrix((idf, 0), shape=(idf.size, idf.size))
|
|
111
|
+
tf_idf = np.dot(tf, idf)
|
|
112
|
+
else:
|
|
113
|
+
tf_idf = np.dot(csr_matrix(tf), csr_matrix(np.diag(idf)))
|
|
114
|
+
|
|
115
|
+
if log_tfidf:
|
|
116
|
+
tf_idf = np.log1p(tf_idf)
|
|
117
|
+
|
|
118
|
+
res = np.nan_to_num(tf_idf, nan=0.0)
|
|
119
|
+
if not inplace:
|
|
120
|
+
return res
|
|
121
|
+
|
|
122
|
+
if to_layer is not None:
|
|
123
|
+
adata.layers[to_layer] = res
|
|
124
|
+
else:
|
|
125
|
+
adata.X = res
|
|
126
|
+
|
|
127
|
+
if copy:
|
|
128
|
+
return adata
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
def binarize(data: AnnData, threshold: np.float32=0):
|
|
132
|
+
"""
|
|
133
|
+
Transform peak counts to the binary matrix (all the non-zero values become 1).
|
|
134
|
+
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
data
|
|
138
|
+
AnnData object with peak counts.
|
|
139
|
+
"""
|
|
140
|
+
if isinstance(data, AnnData):
|
|
141
|
+
adata = data
|
|
142
|
+
else:
|
|
143
|
+
raise TypeError("Expected AnnData object")
|
|
144
|
+
|
|
145
|
+
if issparse(adata.X):
|
|
146
|
+
# Sparse matrix
|
|
147
|
+
adata.X.data[adata.X.data > threshold] = 1
|
|
148
|
+
else:
|
|
149
|
+
adata.X[adata.X > threshold] = 1
|
|
150
|
+
|
|
151
|
+
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
SURE/SURE.py,sha256=mEvzoIvjZUSO0UYSrb-CyGJtkFEweT92_XMQor_lsk0,48381
|
|
2
|
+
SURE/SURE2.py,sha256=8wlnMwb1xuf9QUksNkWdWx5ZWq-xIy9NLx8RdUnE82o,48501
|
|
3
|
+
SURE/__init__.py,sha256=koEpBjG-Q6EaDAfs6qENhAdCuwk4RQMqYahPKTvbBf8,207
|
|
4
|
+
SURE/assembly/__init__.py,sha256=jxZLURXKPzXe21LhrZ09LgZr33iqdjlQy4oSEj5gR2Q,172
|
|
5
|
+
SURE/assembly/assembly.py,sha256=6IMdelPOiRO4mUb4dC7gVCoF1Uvfw86-Map8P_jnUag,21477
|
|
6
|
+
SURE/assembly/atlas.py,sha256=ALjmVWutm_tOHTcT1aqOxmuCEQw-XzrtDoMCV_8oXLk,21794
|
|
7
|
+
SURE/atac/__init__.py,sha256=3smP8IKHfwNCd1G_sZH3pKHXuLkLpFuLtjUTUSy7_As,34
|
|
8
|
+
SURE/atac/utils.py,sha256=m4NYwpy9O5T1pXTzgCOCcmlwrC6GTi-cQ5sm2wZu2O8,4354
|
|
9
|
+
SURE/codebook/__init__.py,sha256=2T5gjp8JIaBayrXAnOJYSebQHsWprOs87difpR1OPNw,243
|
|
10
|
+
SURE/codebook/codebook.py,sha256=ZlN6gRX9Gj2D2u3P5KeOsbZri0MoMAiJo9lNeL-MK-I,17117
|
|
11
|
+
SURE/utils/__init__.py,sha256=Htqv4KqVKcRiaaTBsR-6yZ4LSlbhbzutjNKXGD9-uds,660
|
|
12
|
+
SURE/utils/custom_mlp.py,sha256=07TYX1HgxfEjb_3i5MpiZfNhOhx3dKntuwGkrpteWiM,7036
|
|
13
|
+
SURE/utils/queue.py,sha256=E_5PA5EWcBoGAZj8BkKQnkCK0p4C-4-xcTPqdIXaPXU,1892
|
|
14
|
+
SURE/utils/utils.py,sha256=IUHjDDtYaAYllCWsZyIzqQwaLul6fJRvHRH4vIYcR-c,8462
|
|
15
|
+
sure_tools-1.0.10.dist-info/licenses/LICENSE,sha256=TFHKwmrAViXQbSX5W-NDItkWFjm45HWOeUniDrqmnu0,1065
|
|
16
|
+
sure_tools-1.0.10.dist-info/METADATA,sha256=_7AWIevn5I25CdG3zyV8I42XPr8Vycs71hpbS1aXxys,2651
|
|
17
|
+
sure_tools-1.0.10.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
18
|
+
sure_tools-1.0.10.dist-info/entry_points.txt,sha256=u12payZYgCBy5FCwRHP6AlSQhKCiWSEDwj68r1DVdn8,40
|
|
19
|
+
sure_tools-1.0.10.dist-info/top_level.txt,sha256=BtFTebdiJeqra4r6mm-uEtwVRFLZ_IjYsQ7OnalrOvY,5
|
|
20
|
+
sure_tools-1.0.10.dist-info/RECORD,,
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
SURE/SURE.py,sha256=3k28h-IMb2poRjoM5yq_u7FWO1Z2ixr_k6o05fF9eEE,48333
|
|
2
|
-
SURE/__init__.py,sha256=SbIRwAVBnNhza9vbsUH4N04atr0q_Abp04pCUTBhNio,127
|
|
3
|
-
SURE/assembly/__init__.py,sha256=jxZLURXKPzXe21LhrZ09LgZr33iqdjlQy4oSEj5gR2Q,172
|
|
4
|
-
SURE/assembly/assembly.py,sha256=6IMdelPOiRO4mUb4dC7gVCoF1Uvfw86-Map8P_jnUag,21477
|
|
5
|
-
SURE/assembly/atlas.py,sha256=ALjmVWutm_tOHTcT1aqOxmuCEQw-XzrtDoMCV_8oXLk,21794
|
|
6
|
-
SURE/codebook/__init__.py,sha256=2T5gjp8JIaBayrXAnOJYSebQHsWprOs87difpR1OPNw,243
|
|
7
|
-
SURE/codebook/codebook.py,sha256=ZlN6gRX9Gj2D2u3P5KeOsbZri0MoMAiJo9lNeL-MK-I,17117
|
|
8
|
-
SURE/utils/__init__.py,sha256=Htqv4KqVKcRiaaTBsR-6yZ4LSlbhbzutjNKXGD9-uds,660
|
|
9
|
-
SURE/utils/custom_mlp.py,sha256=07TYX1HgxfEjb_3i5MpiZfNhOhx3dKntuwGkrpteWiM,7036
|
|
10
|
-
SURE/utils/queue.py,sha256=E_5PA5EWcBoGAZj8BkKQnkCK0p4C-4-xcTPqdIXaPXU,1892
|
|
11
|
-
SURE/utils/utils.py,sha256=IUHjDDtYaAYllCWsZyIzqQwaLul6fJRvHRH4vIYcR-c,8462
|
|
12
|
-
sure_tools-1.0.8.dist-info/licenses/LICENSE,sha256=TFHKwmrAViXQbSX5W-NDItkWFjm45HWOeUniDrqmnu0,1065
|
|
13
|
-
sure_tools-1.0.8.dist-info/METADATA,sha256=DCo48fUYBlQSOetxmervWoZP3QTl-z7oeUnXiPJlrP8,2650
|
|
14
|
-
sure_tools-1.0.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
15
|
-
sure_tools-1.0.8.dist-info/entry_points.txt,sha256=u12payZYgCBy5FCwRHP6AlSQhKCiWSEDwj68r1DVdn8,40
|
|
16
|
-
sure_tools-1.0.8.dist-info/top_level.txt,sha256=BtFTebdiJeqra4r6mm-uEtwVRFLZ_IjYsQ7OnalrOvY,5
|
|
17
|
-
sure_tools-1.0.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|