SURE-tools 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of SURE-tools might be problematic. Click here for more details.

SURE/SURE.py CHANGED
@@ -22,6 +22,9 @@ import datatable as dt
22
22
  from tqdm import tqdm
23
23
  from scipy import sparse
24
24
 
25
+ import scanpy as sc
26
+ from .atac import binarize
27
+
25
28
  from typing import Literal
26
29
 
27
30
  import warnings
SURE/__init__.py CHANGED
@@ -3,5 +3,6 @@ from .SURE import SURE
3
3
  from . import utils
4
4
  from . import codebook
5
5
  from . import SURE
6
+ from . import atac
6
7
 
7
- __all__ = ['SURE', 'utils', 'codebook']
8
+ __all__ = ['SURE', 'atac', 'utils', 'codebook']
SURE/atac/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .utils import tfidf, binarize
SURE/atac/utils.py ADDED
@@ -0,0 +1,151 @@
1
+ from typing import Union, Optional
2
+ from warnings import warn
3
+
4
+ import numpy as np
5
+ from scipy.sparse import csr_matrix, dia_matrix, issparse
6
+ from sklearn.preprocessing import MinMaxScaler
7
+
8
+ from anndata import AnnData
9
+
10
+ import scanpy as sc
11
+ from scanpy._utils import view_to_actual
12
+
13
+
14
+ # Computational methods for preprocessing
15
+
16
+
17
+ def tfidf(
18
+ data: AnnData,
19
+ log_tf: bool = True,
20
+ log_idf: bool = True,
21
+ log_tfidf: bool = False,
22
+ scale_factor: Union[int, float] = 1e4,
23
+ inplace: bool = True,
24
+ copy: bool = False,
25
+ from_layer: Optional[str] = None,
26
+ to_layer: Optional[str] = None,
27
+ ):
28
+ """
29
+ Transform peak counts with TF-IDF (Term Frequency - Inverse Document Frequency).
30
+
31
+ TF: peak counts are normalised by total number of counts per cell
32
+ DF: total number of counts for each peak
33
+ IDF: number of cells divided by DF
34
+
35
+ By default, log(TF) * log(IDF) is returned.
36
+
37
+ Parameters
38
+ ----------
39
+ data
40
+ AnnData object with peak counts.
41
+ log_idf
42
+ Log-transform IDF term (True by default).
43
+ log_tf
44
+ Log-transform TF term (True by default).
45
+ log_tfidf
46
+ Log-transform TF*IDF term (False by default).
47
+ Can only be used when log_tf and log_idf are False.
48
+ scale_factor
49
+ Scale factor to multiply the TF-IDF matrix by (1e4 by default).
50
+ inplace
51
+ If to modify counts in the AnnData object (True by default).
52
+ copy
53
+ If to return a copy of the AnnData object or the 'atac' modality (False by default).
54
+ Not compatible with inplace=False.
55
+ from_layer
56
+ Layer to use counts (AnnData.layers[from_layer])
57
+ instead of AnnData.X used by default.
58
+ to_layer
59
+ Layer to save transformed counts to (AnnData.layers[to_layer])
60
+ instead of AnnData.X used by default.
61
+ Not compatible with inplace=False.
62
+ """
63
+ if isinstance(data, AnnData):
64
+ adata = data
65
+ else:
66
+ raise TypeError("Expected AnnData object")
67
+
68
+ if log_tfidf and (log_tf or log_idf):
69
+ raise AttributeError(
70
+ "When returning log(TF*IDF), \
71
+ applying neither log(TF) nor log(IDF) is possible."
72
+ )
73
+
74
+ if copy and not inplace:
75
+ raise ValueError("`copy=True` cannot be used with `inplace=False`.")
76
+
77
+ if to_layer is not None and not inplace:
78
+ raise ValueError(f"`to_layer='{str(to_layer)}'` cannot be used with `inplace=False`.")
79
+
80
+ if copy:
81
+ adata = adata.copy()
82
+
83
+ view_to_actual(adata)
84
+
85
+ counts = adata.X if from_layer is None else adata.layers[from_layer]
86
+
87
+ # Check before the computation
88
+ if to_layer is not None and to_layer in adata.layers:
89
+ warn(f"Existing layer '{str(to_layer)}' will be overwritten")
90
+
91
+ if issparse(counts):
92
+ n_peaks = np.asarray(counts.sum(axis=1)).reshape(-1)
93
+ n_peaks = dia_matrix((1.0 / n_peaks, 0), shape=(n_peaks.size, n_peaks.size))
94
+ # This prevents making TF dense
95
+ tf = np.dot(n_peaks, counts)
96
+ else:
97
+ n_peaks = np.asarray(counts.sum(axis=1)).reshape(-1, 1)
98
+ tf = counts / n_peaks
99
+
100
+ if scale_factor is not None and scale_factor != 0 and scale_factor != 1:
101
+ tf = tf * scale_factor
102
+ if log_tf:
103
+ tf = np.log1p(tf)
104
+
105
+ idf = np.asarray(adata.shape[0] / counts.sum(axis=0)).reshape(-1)
106
+ if log_idf:
107
+ idf = np.log1p(idf)
108
+
109
+ if issparse(tf):
110
+ idf = dia_matrix((idf, 0), shape=(idf.size, idf.size))
111
+ tf_idf = np.dot(tf, idf)
112
+ else:
113
+ tf_idf = np.dot(csr_matrix(tf), csr_matrix(np.diag(idf)))
114
+
115
+ if log_tfidf:
116
+ tf_idf = np.log1p(tf_idf)
117
+
118
+ res = np.nan_to_num(tf_idf, nan=0.0)
119
+ if not inplace:
120
+ return res
121
+
122
+ if to_layer is not None:
123
+ adata.layers[to_layer] = res
124
+ else:
125
+ adata.X = res
126
+
127
+ if copy:
128
+ return adata
129
+
130
+
131
+ def binarize(data: AnnData, threshold: np.float32=0):
132
+ """
133
+ Transform peak counts to the binary matrix (all the non-zero values become 1).
134
+
135
+ Parameters
136
+ ----------
137
+ data
138
+ AnnData object with peak counts.
139
+ """
140
+ if isinstance(data, AnnData):
141
+ adata = data
142
+ else:
143
+ raise TypeError("Expected AnnData object")
144
+
145
+ if issparse(adata.X):
146
+ # Sparse matrix
147
+ adata.X.data[adata.X.data > threshold] = 1
148
+ else:
149
+ adata.X[adata.X > threshold] = 1
150
+
151
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: SURE-tools
3
- Version: 1.0.7
3
+ Version: 1.0.9
4
4
  Summary: Succinct Representation of Single Cells
5
5
  Home-page: https://github.com/ZengFLab/SURE
6
6
  Author: Feng Zeng
@@ -1,17 +1,19 @@
1
- SURE/SURE.py,sha256=3k28h-IMb2poRjoM5yq_u7FWO1Z2ixr_k6o05fF9eEE,48333
2
- SURE/__init__.py,sha256=SbIRwAVBnNhza9vbsUH4N04atr0q_Abp04pCUTBhNio,127
1
+ SURE/SURE.py,sha256=mEvzoIvjZUSO0UYSrb-CyGJtkFEweT92_XMQor_lsk0,48381
2
+ SURE/__init__.py,sha256=UyoyPqpGllsoVLMAO8gX0U6RJUMla6rX86UoAueiKEc,154
3
3
  SURE/assembly/__init__.py,sha256=jxZLURXKPzXe21LhrZ09LgZr33iqdjlQy4oSEj5gR2Q,172
4
4
  SURE/assembly/assembly.py,sha256=6IMdelPOiRO4mUb4dC7gVCoF1Uvfw86-Map8P_jnUag,21477
5
5
  SURE/assembly/atlas.py,sha256=ALjmVWutm_tOHTcT1aqOxmuCEQw-XzrtDoMCV_8oXLk,21794
6
+ SURE/atac/__init__.py,sha256=3smP8IKHfwNCd1G_sZH3pKHXuLkLpFuLtjUTUSy7_As,34
7
+ SURE/atac/utils.py,sha256=m4NYwpy9O5T1pXTzgCOCcmlwrC6GTi-cQ5sm2wZu2O8,4354
6
8
  SURE/codebook/__init__.py,sha256=2T5gjp8JIaBayrXAnOJYSebQHsWprOs87difpR1OPNw,243
7
9
  SURE/codebook/codebook.py,sha256=ZlN6gRX9Gj2D2u3P5KeOsbZri0MoMAiJo9lNeL-MK-I,17117
8
10
  SURE/utils/__init__.py,sha256=Htqv4KqVKcRiaaTBsR-6yZ4LSlbhbzutjNKXGD9-uds,660
9
11
  SURE/utils/custom_mlp.py,sha256=07TYX1HgxfEjb_3i5MpiZfNhOhx3dKntuwGkrpteWiM,7036
10
12
  SURE/utils/queue.py,sha256=E_5PA5EWcBoGAZj8BkKQnkCK0p4C-4-xcTPqdIXaPXU,1892
11
13
  SURE/utils/utils.py,sha256=IUHjDDtYaAYllCWsZyIzqQwaLul6fJRvHRH4vIYcR-c,8462
12
- sure_tools-1.0.7.dist-info/licenses/LICENSE,sha256=TFHKwmrAViXQbSX5W-NDItkWFjm45HWOeUniDrqmnu0,1065
13
- sure_tools-1.0.7.dist-info/METADATA,sha256=jS2KOJhrVk3boa1Yws4cQyIQykMCk9LaCkozyIn5Uj8,2650
14
- sure_tools-1.0.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
15
- sure_tools-1.0.7.dist-info/entry_points.txt,sha256=u12payZYgCBy5FCwRHP6AlSQhKCiWSEDwj68r1DVdn8,40
16
- sure_tools-1.0.7.dist-info/top_level.txt,sha256=BtFTebdiJeqra4r6mm-uEtwVRFLZ_IjYsQ7OnalrOvY,5
17
- sure_tools-1.0.7.dist-info/RECORD,,
14
+ sure_tools-1.0.9.dist-info/licenses/LICENSE,sha256=TFHKwmrAViXQbSX5W-NDItkWFjm45HWOeUniDrqmnu0,1065
15
+ sure_tools-1.0.9.dist-info/METADATA,sha256=BrvN2nIVYq-z39MdOb3nGNnpYbPifOHOmDBJF2zphVo,2650
16
+ sure_tools-1.0.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
17
+ sure_tools-1.0.9.dist-info/entry_points.txt,sha256=u12payZYgCBy5FCwRHP6AlSQhKCiWSEDwj68r1DVdn8,40
18
+ sure_tools-1.0.9.dist-info/top_level.txt,sha256=BtFTebdiJeqra4r6mm-uEtwVRFLZ_IjYsQ7OnalrOvY,5
19
+ sure_tools-1.0.9.dist-info/RECORD,,