PyPI - SURE-tools - Versions diffs - 1.0.1__py3-none-any.whl - Mend

SURE-tools 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of SURE-tools might be problematic. Click here for more details.

Files changed (17) hide show

SURE/SURE.py +1203 -0
SURE/__init__.py +7 -0
SURE/assembly/__init__.py +3 -0
SURE/assembly/assembly.py +511 -0
SURE/assembly/atlas.py +575 -0
SURE/codebook/__init__.py +4 -0
SURE/codebook/codebook.py +472 -0
SURE/utils/__init__.py +19 -0
SURE/utils/custom_mlp.py +209 -0
SURE/utils/queue.py +50 -0
SURE/utils/utils.py +308 -0
SURE_tools-1.0.1.dist-info/LICENSE +21 -0
SURE_tools-1.0.1.dist-info/METADATA +68 -0
SURE_tools-1.0.1.dist-info/RECORD +17 -0
SURE_tools-1.0.1.dist-info/WHEEL +5 -0
SURE_tools-1.0.1.dist-info/entry_points.txt +2 -0
SURE_tools-1.0.1.dist-info/top_level.txt +1 -0

SURE/utils/queue.py ADDED Viewed

@@ -0,0 +1,50 @@
+import heapq
+class PriorityQueue:
+    def __init__(self):
+        self.elements = []
+        self.entry_finder = {}  # mapping of tasks to entries
+        self.REMOVED = '<removed-task>'  # placeholder for a removed task
+        self.counter = 0  # unique sequence count
+    def is_empty(self):
+        return not self.elements
+    def put(self, item, priority):
+        if item in self.entry_finder:
+            self.remove(item)  # Remove the existing entry if it exists
+        entry = [priority, self.counter, item]  # New entry with priority, counter, and item
+        self.entry_finder[item] = entry
+        heapq.heappush(self.elements, entry)
+        self.counter += 1
+    def remove(self, item):
+        # Mark an existing task as REMOVED
+        entry = self.entry_finder.pop(item)
+        entry[-1] = self.REMOVED  # Mark it as removed
+    def get(self):
+        while self.elements:
+            priority, count, item = heapq.heappop(self.elements)
+            if item is not self.REMOVED:
+                del self.entry_finder[item]  # Remove from the entry finder
+                return item
+        raise KeyError('pop from an empty priority queue')
+    def peek(self):
+        while self.elements:
+            priority, count, item = self.elements[0]
+            if item is not self.REMOVED:
+                return item
+            heapq.heappop(self.elements)  # Remove stale entry
+        return None
+    def update(self, item, priority):
+        self.put(item, priority)  # Use put to add or update the item
+    def __iter__(self):
+        # Iterate over the priority queue without popping items
+        # This will create a shallow copy of the current elements
+        temp_elements = [(priority, count, item) for priority, count, item in self.elements if item is not self.REMOVED]
+        for priority, count, item in sorted(temp_elements):
+            yield item

SURE/utils/utils.py ADDED Viewed

@@ -0,0 +1,308 @@
+import torch
+from torch.utils.data import Dataset
+import numpy as np
+import textwrap
+class CustomDataset(Dataset):
+    def __init__(self, X):
+        self.X = X
+    def __len__(self):
+        return len(self.X)
+    def __getitem__(self, idx):
+        x = self.X[idx]
+        return x, idx
+class CustomDataset2(Dataset):
+    def __init__(self, X, U):
+        self.X = X
+        self.U = U
+    def __len__(self):
+        return len(self.X)
+    def __getitem__(self, idx):
+        x = self.X[idx]
+        if self.U is None:
+            u = x
+        else:
+            u = self.U[idx]
+        return x, u, idx
+class CustomDataset3(Dataset):
+    def __init__(self, X, U, Y):
+        self.X = X
+        self.U = U
+        self.Y = Y
+    def __len__(self):
+        return len(self.X)
+    def __getitem__(self, idx):
+        x = self.X[idx]
+        if self.U is None:
+            u = x
+        else:
+            u = self.U[idx]
+        if self.Y is None:
+            y = x
+        else:
+            y = self.Y[idx]
+        return x, u, y, idx
+class CustomDataset4(Dataset):
+    def __init__(self, X, Y, Z, U):
+        self.X = X
+        self.U = U
+        self.Y = Y
+        self.Z = Z
+    def __len__(self):
+        return len(self.X)
+    def __getitem__(self, idx):
+        x = self.X[idx]
+        if self.U is None:
+            u = x
+        else:
+            u = self.U[idx]
+        if self.Y is None:
+            y = x
+        else:
+            y = self.Y[idx]
+        if self.Z is None:
+            z = x
+        else:
+            z = self.Z[idx]
+        return x, y, z, u, idx
+class CustomMultiOmicsDataset(Dataset):
+    def __init__(self, X1, X2):
+        self.X1 = X1
+        self.X2 = X2
+    def __len__(self):
+        return len(self.X1)
+    def __getitem__(self, idx):
+        x1 = self.X1[idx]
+        x2 = self.X2[idx]
+        return x1, x2, idx
+class CustomMultiOmicsDataset2(Dataset):
+    def __init__(self, X1, X2, U):
+        self.X1 = X1
+        self.X2 = X2
+        self.U = U
+    def __len__(self):
+        return len(self.X1)
+    def __getitem__(self, idx):
+        x1 = self.X1[idx]
+        x2 = self.X2[idx]
+        if self.U is None:
+            u=x1
+        else:
+            u = self.U[idx]
+        return x1, x2, u, idx
+class CustomMultiOmicsDataset3(Dataset):
+    def __init__(self, X1, X2, Y, U):
+        self.X1 = X1
+        self.X2 = X2
+        self.U = U
+        self.Y = Y
+    def __len__(self):
+        return len(self.X1)
+    def __getitem__(self, idx):
+        x1 = self.X1[idx]
+        x2 = self.X2[idx]
+        if self.U is None:
+            u=x1
+        else:
+            u = self.U[idx]
+        if self.Y is None:
+            y=x1
+        else:
+            y=self.Y[idx]
+        return x1, x2, y, u, idx
+class CustomMultiOmicsDataset4(Dataset):
+    def __init__(self, X1, X2, Y, Z, U):
+        self.X1 = X1
+        self.X2 = X2
+        self.U = U
+        self.Y = Y
+        self.Z = Z
+    def __len__(self):
+        return len(self.X1)
+    def __getitem__(self, idx):
+        x1 = self.X1[idx]
+        x2 = self.X2[idx]
+        if self.U is None:
+            u=x1
+        else:
+            u = self.U[idx]
+        if self.Y is None:
+            y=x1
+        else:
+            y = self.Y[idx]
+        if self.Z is None:
+            z=x1
+        else:
+            z = self.Z[idx]
+        return x1, x2, y, z, u, idx
+def tensor_to_numpy(tensor):
+    """
+    Check if the tensor is on a CUDA device. If yes, detach it, move it to CPU,
+    and convert to a NumPy array. If not, just detach and convert to NumPy.
+    Args:
+        tensor (torch.Tensor): The input tensor.
+    Returns:
+        np.ndarray: The resulting NumPy array.
+    """
+    # Check if the input is a tensor
+    if not isinstance(tensor, torch.Tensor):
+        if isinstance(tensor, np.ndarray):
+            return tensor
+        raise ValueError("Input must be a torch Tensor.")
+    # Detach the tensor from the computation graph
+    tensor = tensor.detach()
+    # Check if the tensor is on CUDA
+    if tensor.is_cuda:
+        tensor = tensor.cpu()
+    # Convert to NumPy
+    numpy_array = tensor.numpy()
+    return numpy_array
+def move_to_device(data, device):
+    """
+    Checks if the input data is a tensor. If not, converts it to a tensor,
+    checks if the tensor is on the specified device, and moves it if necessary.
+    Args:
+        data (any): The input data to check (can be a tensor, list, NumPy array, etc.).
+        device (str or torch.device): The device to check against (e.g., 'cpu', 'cuda', 'cuda:0').
+    Returns:
+        torch.Tensor: The tensor on the specified device.
+    """
+    # Convert input data to tensor if it's not already a tensor
+    if not isinstance(data, torch.Tensor):
+        data = torch.tensor(data)
+    # Check if the device is a string, and convert it to torch.device if necessary
+    device = torch.device(device) if isinstance(device, str) else device
+    # Move the tensor to the specified device if necessary
+    if data.device != device:
+        data = data.to(device)
+    return data
+def convert_to_tensor(input_array, dtype=torch.float32, device=None):
+    """
+    Check if the input array is a torch tensor and convert it to a tensor if it is not.
+    If dtype is specified, convert the tensor to the specified dtype if necessary.
+    Parameters:
+    - input_array: The input array to check and convert.
+    - dtype: The desired data type for the resulting tensor (optional).
+    Returns:
+    - A torch tensor.
+    """
+    # Check if the input is already a torch tensor
+    if isinstance(input_array, torch.Tensor):
+        #print("Input is already a torch tensor.")
+        # If dtype is specified, check and convert if necessary
+        if dtype is not None and input_array.dtype != dtype:
+            #print(f"Changing tensor dtype from {input_array.dtype} to {dtype}.")
+            input_array = input_array.to(dtype)
+        if device:
+            input_array = move_to_device(input_array, device)
+        return input_array  # Return the tensor unchanged if dtype matches
+    else:
+        # Convert to torch tensor
+        #print("Input is not a torch tensor. Converting to torch tensor.")
+        tensor = torch.tensor(input_array, dtype=dtype)
+        if device:
+            tensor = move_to_device(tensor, device)
+        return tensor
+class Colors:
+    RESET = "\033[0m"
+    RED = "\033[31m"
+    GREEN = "\033[32m"
+    YELLOW = "\033[33m"
+    BLUE = "\033[34m"
+    MAGENTA = "\033[35m"
+    CYAN = "\033[36m"
+    WHITE = "\033[37m"
+def pretty_print(long_text, width=120, color='green'):
+    # Convert multiple spaces to a single space
+    formatted_text = ' '.join(long_text.split())
+    # Wrap the text to a specified width
+    wrapped_text = textwrap.fill(formatted_text, width=width)
+    # Define the indent for subsequent lines
+    indent = '    '  # Four spaces for indentation
+    # Split the wrapped text into lines
+    lines = wrapped_text.split('\n')
+    text_color = Colors.RESET
+    if color.lower() == 'green':
+        text_color = Colors.GREEN
+    elif color.lower == 'yellow':
+        text_color = Colors.YELLOW
+    # Print the first line without indent
+    print(text_color + lines[0] + Colors.RESET)
+    # Print the subsequent lines with indent
+    for line in lines[1:]:
+        print(indent + text_color + line + Colors.RESET)
+def find_partitions_greedy(numbers, num_groups):
+    # Step 1: Calculate the target sum per group
+    total_sum = sum(numbers)
+    target_per_group = total_sum / num_groups
+    # Initialize data structures
+    groups = [[] for _ in range(num_groups)]  # Groups of numbers
+    sums = [0] * num_groups  # Sums of each group
+    indices = [[] for _ in range(num_groups)]  # Indices of numbers in original list
+    # Step 2: Sort numbers and their indices based on value
+    sorted_numbers_with_indices = sorted(enumerate(numbers), key=lambda x: -x[1])
+    # Step 3: Distribute numbers to approach the target sum per group as close as possible
+    for index, number in sorted_numbers_with_indices:
+        # Find the group with the minimum sum
+        min_group_index = sums.index(min(sums))
+        groups[min_group_index].append(number)
+        indices[min_group_index].append(index)
+        sums[min_group_index] += number
+    # Return the groups with their original indices
+    return [(group, index_group) for group, index_group in zip(groups, indices)]

SURE_tools-1.0.1.dist-info/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 ZengFLab
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

SURE_tools-1.0.1.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,68 @@
+Metadata-Version: 2.1
+Name: SURE-tools
+Version: 1.0.1
+Summary: Succinct Representation of Single Cells
+Home-page: https://github.com/ZengFLab/SURE
+Author: Feng Zeng
+Author-email: zengfeng@xmu.edu.cn
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: dill==0.3.8
+Requires-Dist: scanpy
+Requires-Dist: pytorch-ignite
+Requires-Dist: datatable
+Requires-Dist: scipy
+Requires-Dist: numpy
+Requires-Dist: scikit-learn
+Requires-Dist: pandas
+Requires-Dist: pyro-ppl
+Requires-Dist: leidenalg
+Requires-Dist: python-igraph
+Requires-Dist: networkx
+Requires-Dist: matplotlib
+Requires-Dist: seaborn
+Requires-Dist: fa2-modified
+# SURE: SUccinct REpresentation of cells
+ SURE introduces a vector quantization-based probabilistic generative model for calling metacells and use them as landmarks that form a coordinate system for cell ID. Analyzing single-cell omics data in a manner analogous to reference genome-based genomic analysis.
+## **$$\color{red}\text{\textbf{UPDATE}}$$**
+An update has been distributed. Users can access to it via [SUREv2](https://github.com/ZengFLab/SUREv2). It provides Python classes that users can call SURE in scripts. It also provide the command that users can run SURE in the shell. Additionally, SUREv2 supports the calling of metacells for multi-omics datasets.
+## Installation
+1. Create a virtual environment
+```bash
+conda create -n SUREv1 python=3.10 scipy numpy pandas scikit-learn && conda activate SUREv1
+```
+2. Install [PyTorch](https://pytorch.org/get-started/locally/) following the official instruction.
+```bash
+pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu126
+```
+3. Install SURE
+```bash
+pip3 install SURE-tools
+```
+## Example 1: Calling metacells for a single-cell dataset
+Users can refer to [here](https://github.com/ZengFLab/SURE_example_1) for details.
+## Example 2: The hierarchical assembly of large-scale dataset(s)
+Users can refer to [here](https://github.com/ZengFLab/SURE_example_2) for details.
+## Example 3: Human brain cell atlas
+Users can refer to [here](https://github.com/ZengFLab/SURE_example_3) for details.
+## Example 4: Metacell calling for scATAC-seq data
+Users can refer to [here](https://github.com/ZengFLab/SURE_example_4) for details.

SURE_tools-1.0.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,17 @@
+SURE/SURE.py,sha256=RTzWrKwIuMj6jzivdf8MlcIXbyfIcBpJ_gjoKWXVQAs,46960
+SURE/__init__.py,sha256=SbIRwAVBnNhza9vbsUH4N04atr0q_Abp04pCUTBhNio,127
+SURE/assembly/__init__.py,sha256=jxZLURXKPzXe21LhrZ09LgZr33iqdjlQy4oSEj5gR2Q,172
+SURE/assembly/assembly.py,sha256=6IMdelPOiRO4mUb4dC7gVCoF1Uvfw86-Map8P_jnUag,21477
+SURE/assembly/atlas.py,sha256=ALjmVWutm_tOHTcT1aqOxmuCEQw-XzrtDoMCV_8oXLk,21794
+SURE/codebook/__init__.py,sha256=2T5gjp8JIaBayrXAnOJYSebQHsWprOs87difpR1OPNw,243
+SURE/codebook/codebook.py,sha256=ZlN6gRX9Gj2D2u3P5KeOsbZri0MoMAiJo9lNeL-MK-I,17117
+SURE/utils/__init__.py,sha256=Htqv4KqVKcRiaaTBsR-6yZ4LSlbhbzutjNKXGD9-uds,660
+SURE/utils/custom_mlp.py,sha256=07TYX1HgxfEjb_3i5MpiZfNhOhx3dKntuwGkrpteWiM,7036
+SURE/utils/queue.py,sha256=E_5PA5EWcBoGAZj8BkKQnkCK0p4C-4-xcTPqdIXaPXU,1892
+SURE/utils/utils.py,sha256=IUHjDDtYaAYllCWsZyIzqQwaLul6fJRvHRH4vIYcR-c,8462
+SURE_tools-1.0.1.dist-info/LICENSE,sha256=TFHKwmrAViXQbSX5W-NDItkWFjm45HWOeUniDrqmnu0,1065
+SURE_tools-1.0.1.dist-info/METADATA,sha256=FnSfFi7Gu_fZlX1TeWdub59B44yxCSv_UcShk2_RfiU,2431
+SURE_tools-1.0.1.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+SURE_tools-1.0.1.dist-info/entry_points.txt,sha256=u12payZYgCBy5FCwRHP6AlSQhKCiWSEDwj68r1DVdn8,40
+SURE_tools-1.0.1.dist-info/top_level.txt,sha256=BtFTebdiJeqra4r6mm-uEtwVRFLZ_IjYsQ7OnalrOvY,5
+SURE_tools-1.0.1.dist-info/RECORD,,

SURE_tools-1.0.1.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (75.1.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

SURE_tools-1.0.1.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ SURE = SURE.SURE:main

SURE_tools-1.0.1.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ SURE