SURE-tools 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of SURE-tools might be problematic. Click here for more details.

SURE/utils/queue.py ADDED
@@ -0,0 +1,50 @@
1
+ import heapq
2
+
3
+ class PriorityQueue:
4
+ def __init__(self):
5
+ self.elements = []
6
+ self.entry_finder = {} # mapping of tasks to entries
7
+ self.REMOVED = '<removed-task>' # placeholder for a removed task
8
+ self.counter = 0 # unique sequence count
9
+
10
+ def is_empty(self):
11
+ return not self.elements
12
+
13
+ def put(self, item, priority):
14
+ if item in self.entry_finder:
15
+ self.remove(item) # Remove the existing entry if it exists
16
+ entry = [priority, self.counter, item] # New entry with priority, counter, and item
17
+ self.entry_finder[item] = entry
18
+ heapq.heappush(self.elements, entry)
19
+ self.counter += 1
20
+
21
+ def remove(self, item):
22
+ # Mark an existing task as REMOVED
23
+ entry = self.entry_finder.pop(item)
24
+ entry[-1] = self.REMOVED # Mark it as removed
25
+
26
+ def get(self):
27
+ while self.elements:
28
+ priority, count, item = heapq.heappop(self.elements)
29
+ if item is not self.REMOVED:
30
+ del self.entry_finder[item] # Remove from the entry finder
31
+ return item
32
+ raise KeyError('pop from an empty priority queue')
33
+
34
+ def peek(self):
35
+ while self.elements:
36
+ priority, count, item = self.elements[0]
37
+ if item is not self.REMOVED:
38
+ return item
39
+ heapq.heappop(self.elements) # Remove stale entry
40
+ return None
41
+
42
+ def update(self, item, priority):
43
+ self.put(item, priority) # Use put to add or update the item
44
+
45
+ def __iter__(self):
46
+ # Iterate over the priority queue without popping items
47
+ # This will create a shallow copy of the current elements
48
+ temp_elements = [(priority, count, item) for priority, count, item in self.elements if item is not self.REMOVED]
49
+ for priority, count, item in sorted(temp_elements):
50
+ yield item
SURE/utils/utils.py ADDED
@@ -0,0 +1,308 @@
1
+ import torch
2
+ from torch.utils.data import Dataset
3
+
4
+ import numpy as np
5
+ import textwrap
6
+
7
+ class CustomDataset(Dataset):
8
+ def __init__(self, X):
9
+ self.X = X
10
+
11
+ def __len__(self):
12
+ return len(self.X)
13
+
14
+ def __getitem__(self, idx):
15
+ x = self.X[idx]
16
+ return x, idx
17
+
18
+ class CustomDataset2(Dataset):
19
+ def __init__(self, X, U):
20
+ self.X = X
21
+ self.U = U
22
+
23
+ def __len__(self):
24
+ return len(self.X)
25
+
26
+ def __getitem__(self, idx):
27
+ x = self.X[idx]
28
+ if self.U is None:
29
+ u = x
30
+ else:
31
+ u = self.U[idx]
32
+ return x, u, idx
33
+
34
+
35
+ class CustomDataset3(Dataset):
36
+ def __init__(self, X, U, Y):
37
+ self.X = X
38
+ self.U = U
39
+ self.Y = Y
40
+
41
+ def __len__(self):
42
+ return len(self.X)
43
+
44
+ def __getitem__(self, idx):
45
+ x = self.X[idx]
46
+ if self.U is None:
47
+ u = x
48
+ else:
49
+ u = self.U[idx]
50
+ if self.Y is None:
51
+ y = x
52
+ else:
53
+ y = self.Y[idx]
54
+ return x, u, y, idx
55
+
56
+ class CustomDataset4(Dataset):
57
+ def __init__(self, X, Y, Z, U):
58
+ self.X = X
59
+ self.U = U
60
+ self.Y = Y
61
+ self.Z = Z
62
+
63
+ def __len__(self):
64
+ return len(self.X)
65
+
66
+ def __getitem__(self, idx):
67
+ x = self.X[idx]
68
+ if self.U is None:
69
+ u = x
70
+ else:
71
+ u = self.U[idx]
72
+ if self.Y is None:
73
+ y = x
74
+ else:
75
+ y = self.Y[idx]
76
+ if self.Z is None:
77
+ z = x
78
+ else:
79
+ z = self.Z[idx]
80
+ return x, y, z, u, idx
81
+
82
+ class CustomMultiOmicsDataset(Dataset):
83
+ def __init__(self, X1, X2):
84
+ self.X1 = X1
85
+ self.X2 = X2
86
+
87
+ def __len__(self):
88
+ return len(self.X1)
89
+
90
+ def __getitem__(self, idx):
91
+ x1 = self.X1[idx]
92
+ x2 = self.X2[idx]
93
+ return x1, x2, idx
94
+
95
+ class CustomMultiOmicsDataset2(Dataset):
96
+ def __init__(self, X1, X2, U):
97
+ self.X1 = X1
98
+ self.X2 = X2
99
+ self.U = U
100
+
101
+ def __len__(self):
102
+ return len(self.X1)
103
+
104
+ def __getitem__(self, idx):
105
+ x1 = self.X1[idx]
106
+ x2 = self.X2[idx]
107
+ if self.U is None:
108
+ u=x1
109
+ else:
110
+ u = self.U[idx]
111
+ return x1, x2, u, idx
112
+
113
+ class CustomMultiOmicsDataset3(Dataset):
114
+ def __init__(self, X1, X2, Y, U):
115
+ self.X1 = X1
116
+ self.X2 = X2
117
+ self.U = U
118
+ self.Y = Y
119
+
120
+ def __len__(self):
121
+ return len(self.X1)
122
+
123
+ def __getitem__(self, idx):
124
+ x1 = self.X1[idx]
125
+ x2 = self.X2[idx]
126
+ if self.U is None:
127
+ u=x1
128
+ else:
129
+ u = self.U[idx]
130
+ if self.Y is None:
131
+ y=x1
132
+ else:
133
+ y=self.Y[idx]
134
+ return x1, x2, y, u, idx
135
+
136
+ class CustomMultiOmicsDataset4(Dataset):
137
+ def __init__(self, X1, X2, Y, Z, U):
138
+ self.X1 = X1
139
+ self.X2 = X2
140
+ self.U = U
141
+ self.Y = Y
142
+ self.Z = Z
143
+
144
+ def __len__(self):
145
+ return len(self.X1)
146
+
147
+ def __getitem__(self, idx):
148
+ x1 = self.X1[idx]
149
+ x2 = self.X2[idx]
150
+ if self.U is None:
151
+ u=x1
152
+ else:
153
+ u = self.U[idx]
154
+ if self.Y is None:
155
+ y=x1
156
+ else:
157
+ y = self.Y[idx]
158
+ if self.Z is None:
159
+ z=x1
160
+ else:
161
+ z = self.Z[idx]
162
+ return x1, x2, y, z, u, idx
163
+
164
+ def tensor_to_numpy(tensor):
165
+ """
166
+ Check if the tensor is on a CUDA device. If yes, detach it, move it to CPU,
167
+ and convert to a NumPy array. If not, just detach and convert to NumPy.
168
+
169
+ Args:
170
+ tensor (torch.Tensor): The input tensor.
171
+
172
+ Returns:
173
+ np.ndarray: The resulting NumPy array.
174
+ """
175
+ # Check if the input is a tensor
176
+ if not isinstance(tensor, torch.Tensor):
177
+ if isinstance(tensor, np.ndarray):
178
+ return tensor
179
+ raise ValueError("Input must be a torch Tensor.")
180
+
181
+ # Detach the tensor from the computation graph
182
+ tensor = tensor.detach()
183
+
184
+ # Check if the tensor is on CUDA
185
+ if tensor.is_cuda:
186
+ tensor = tensor.cpu()
187
+
188
+ # Convert to NumPy
189
+ numpy_array = tensor.numpy()
190
+ return numpy_array
191
+
192
+ def move_to_device(data, device):
193
+ """
194
+ Checks if the input data is a tensor. If not, converts it to a tensor,
195
+ checks if the tensor is on the specified device, and moves it if necessary.
196
+
197
+ Args:
198
+ data (any): The input data to check (can be a tensor, list, NumPy array, etc.).
199
+ device (str or torch.device): The device to check against (e.g., 'cpu', 'cuda', 'cuda:0').
200
+
201
+ Returns:
202
+ torch.Tensor: The tensor on the specified device.
203
+ """
204
+ # Convert input data to tensor if it's not already a tensor
205
+ if not isinstance(data, torch.Tensor):
206
+ data = torch.tensor(data)
207
+
208
+ # Check if the device is a string, and convert it to torch.device if necessary
209
+ device = torch.device(device) if isinstance(device, str) else device
210
+
211
+ # Move the tensor to the specified device if necessary
212
+ if data.device != device:
213
+ data = data.to(device)
214
+
215
+ return data
216
+
217
+
218
+ def convert_to_tensor(input_array, dtype=torch.float32, device=None):
219
+ """
220
+ Check if the input array is a torch tensor and convert it to a tensor if it is not.
221
+ If dtype is specified, convert the tensor to the specified dtype if necessary.
222
+
223
+ Parameters:
224
+ - input_array: The input array to check and convert.
225
+ - dtype: The desired data type for the resulting tensor (optional).
226
+
227
+ Returns:
228
+ - A torch tensor.
229
+ """
230
+ # Check if the input is already a torch tensor
231
+ if isinstance(input_array, torch.Tensor):
232
+ #print("Input is already a torch tensor.")
233
+ # If dtype is specified, check and convert if necessary
234
+ if dtype is not None and input_array.dtype != dtype:
235
+ #print(f"Changing tensor dtype from {input_array.dtype} to {dtype}.")
236
+ input_array = input_array.to(dtype)
237
+ if device:
238
+ input_array = move_to_device(input_array, device)
239
+ return input_array # Return the tensor unchanged if dtype matches
240
+
241
+ else:
242
+ # Convert to torch tensor
243
+ #print("Input is not a torch tensor. Converting to torch tensor.")
244
+ tensor = torch.tensor(input_array, dtype=dtype)
245
+ if device:
246
+ tensor = move_to_device(tensor, device)
247
+ return tensor
248
+
249
+
250
+ class Colors:
251
+ RESET = "\033[0m"
252
+ RED = "\033[31m"
253
+ GREEN = "\033[32m"
254
+ YELLOW = "\033[33m"
255
+ BLUE = "\033[34m"
256
+ MAGENTA = "\033[35m"
257
+ CYAN = "\033[36m"
258
+ WHITE = "\033[37m"
259
+
260
+ def pretty_print(long_text, width=120, color='green'):
261
+ # Convert multiple spaces to a single space
262
+ formatted_text = ' '.join(long_text.split())
263
+
264
+ # Wrap the text to a specified width
265
+ wrapped_text = textwrap.fill(formatted_text, width=width)
266
+
267
+ # Define the indent for subsequent lines
268
+ indent = ' ' # Four spaces for indentation
269
+
270
+ # Split the wrapped text into lines
271
+ lines = wrapped_text.split('\n')
272
+
273
+ text_color = Colors.RESET
274
+ if color.lower() == 'green':
275
+ text_color = Colors.GREEN
276
+ elif color.lower == 'yellow':
277
+ text_color = Colors.YELLOW
278
+
279
+ # Print the first line without indent
280
+ print(text_color + lines[0] + Colors.RESET)
281
+
282
+ # Print the subsequent lines with indent
283
+ for line in lines[1:]:
284
+ print(indent + text_color + line + Colors.RESET)
285
+
286
+ def find_partitions_greedy(numbers, num_groups):
287
+ # Step 1: Calculate the target sum per group
288
+ total_sum = sum(numbers)
289
+ target_per_group = total_sum / num_groups
290
+
291
+ # Initialize data structures
292
+ groups = [[] for _ in range(num_groups)] # Groups of numbers
293
+ sums = [0] * num_groups # Sums of each group
294
+ indices = [[] for _ in range(num_groups)] # Indices of numbers in original list
295
+
296
+ # Step 2: Sort numbers and their indices based on value
297
+ sorted_numbers_with_indices = sorted(enumerate(numbers), key=lambda x: -x[1])
298
+
299
+ # Step 3: Distribute numbers to approach the target sum per group as close as possible
300
+ for index, number in sorted_numbers_with_indices:
301
+ # Find the group with the minimum sum
302
+ min_group_index = sums.index(min(sums))
303
+ groups[min_group_index].append(number)
304
+ indices[min_group_index].append(index)
305
+ sums[min_group_index] += number
306
+
307
+ # Return the groups with their original indices
308
+ return [(group, index_group) for group, index_group in zip(groups, indices)]
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 ZengFLab
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,68 @@
1
+ Metadata-Version: 2.1
2
+ Name: SURE-tools
3
+ Version: 1.0.1
4
+ Summary: Succinct Representation of Single Cells
5
+ Home-page: https://github.com/ZengFLab/SURE
6
+ Author: Feng Zeng
7
+ Author-email: zengfeng@xmu.edu.cn
8
+ Classifier: Programming Language :: Python :: 3
9
+ Classifier: License :: OSI Approved :: MIT License
10
+ Classifier: Operating System :: OS Independent
11
+ Requires-Python: >=3.10
12
+ Description-Content-Type: text/markdown
13
+ License-File: LICENSE
14
+ Requires-Dist: dill==0.3.8
15
+ Requires-Dist: scanpy
16
+ Requires-Dist: pytorch-ignite
17
+ Requires-Dist: datatable
18
+ Requires-Dist: scipy
19
+ Requires-Dist: numpy
20
+ Requires-Dist: scikit-learn
21
+ Requires-Dist: pandas
22
+ Requires-Dist: pyro-ppl
23
+ Requires-Dist: leidenalg
24
+ Requires-Dist: python-igraph
25
+ Requires-Dist: networkx
26
+ Requires-Dist: matplotlib
27
+ Requires-Dist: seaborn
28
+ Requires-Dist: fa2-modified
29
+
30
+ # SURE: SUccinct REpresentation of cells
31
+ SURE introduces a vector quantization-based probabilistic generative model for calling metacells and use them as landmarks that form a coordinate system for cell ID. Analyzing single-cell omics data in a manner analogous to reference genome-based genomic analysis.
32
+
33
+ ## **$$\color{red}\text{\textbf{UPDATE}}$$**
34
+ An update has been distributed. Users can access to it via [SUREv2](https://github.com/ZengFLab/SUREv2). It provides Python classes that users can call SURE in scripts. It also provide the command that users can run SURE in the shell. Additionally, SUREv2 supports the calling of metacells for multi-omics datasets.
35
+
36
+ ## Installation
37
+ 1. Create a virtual environment
38
+ ```bash
39
+ conda create -n SUREv1 python=3.10 scipy numpy pandas scikit-learn && conda activate SUREv1
40
+ ```
41
+
42
+ 2. Install [PyTorch](https://pytorch.org/get-started/locally/) following the official instruction.
43
+ ```bash
44
+ pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu126
45
+ ```
46
+
47
+ 3. Install SURE
48
+ ```bash
49
+ pip3 install SURE-tools
50
+ ```
51
+
52
+ ## Example 1: Calling metacells for a single-cell dataset
53
+
54
+ Users can refer to [here](https://github.com/ZengFLab/SURE_example_1) for details.
55
+
56
+ ## Example 2: The hierarchical assembly of large-scale dataset(s)
57
+
58
+ Users can refer to [here](https://github.com/ZengFLab/SURE_example_2) for details.
59
+
60
+ ## Example 3: Human brain cell atlas
61
+
62
+ Users can refer to [here](https://github.com/ZengFLab/SURE_example_3) for details.
63
+
64
+ ## Example 4: Metacell calling for scATAC-seq data
65
+
66
+ Users can refer to [here](https://github.com/ZengFLab/SURE_example_4) for details.
67
+
68
+
@@ -0,0 +1,17 @@
1
+ SURE/SURE.py,sha256=RTzWrKwIuMj6jzivdf8MlcIXbyfIcBpJ_gjoKWXVQAs,46960
2
+ SURE/__init__.py,sha256=SbIRwAVBnNhza9vbsUH4N04atr0q_Abp04pCUTBhNio,127
3
+ SURE/assembly/__init__.py,sha256=jxZLURXKPzXe21LhrZ09LgZr33iqdjlQy4oSEj5gR2Q,172
4
+ SURE/assembly/assembly.py,sha256=6IMdelPOiRO4mUb4dC7gVCoF1Uvfw86-Map8P_jnUag,21477
5
+ SURE/assembly/atlas.py,sha256=ALjmVWutm_tOHTcT1aqOxmuCEQw-XzrtDoMCV_8oXLk,21794
6
+ SURE/codebook/__init__.py,sha256=2T5gjp8JIaBayrXAnOJYSebQHsWprOs87difpR1OPNw,243
7
+ SURE/codebook/codebook.py,sha256=ZlN6gRX9Gj2D2u3P5KeOsbZri0MoMAiJo9lNeL-MK-I,17117
8
+ SURE/utils/__init__.py,sha256=Htqv4KqVKcRiaaTBsR-6yZ4LSlbhbzutjNKXGD9-uds,660
9
+ SURE/utils/custom_mlp.py,sha256=07TYX1HgxfEjb_3i5MpiZfNhOhx3dKntuwGkrpteWiM,7036
10
+ SURE/utils/queue.py,sha256=E_5PA5EWcBoGAZj8BkKQnkCK0p4C-4-xcTPqdIXaPXU,1892
11
+ SURE/utils/utils.py,sha256=IUHjDDtYaAYllCWsZyIzqQwaLul6fJRvHRH4vIYcR-c,8462
12
+ SURE_tools-1.0.1.dist-info/LICENSE,sha256=TFHKwmrAViXQbSX5W-NDItkWFjm45HWOeUniDrqmnu0,1065
13
+ SURE_tools-1.0.1.dist-info/METADATA,sha256=FnSfFi7Gu_fZlX1TeWdub59B44yxCSv_UcShk2_RfiU,2431
14
+ SURE_tools-1.0.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
15
+ SURE_tools-1.0.1.dist-info/entry_points.txt,sha256=u12payZYgCBy5FCwRHP6AlSQhKCiWSEDwj68r1DVdn8,40
16
+ SURE_tools-1.0.1.dist-info/top_level.txt,sha256=BtFTebdiJeqra4r6mm-uEtwVRFLZ_IjYsQ7OnalrOvY,5
17
+ SURE_tools-1.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (75.1.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ SURE = SURE.SURE:main
@@ -0,0 +1 @@
1
+ SURE