PyPI - sciv - Versions diffs - 0.0.82__py3-none-any.whl → 0.0.84__py3-none-any.whl - Mend

sciv 0.0.82py3-none-any.whl → 0.0.84py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

sciv/tool/_algorithm_.py CHANGED Viewed

@@ -14,7 +14,6 @@ from pandas import DataFrame
 from ._matrix_ import (
     matrix_dot_block_storage,
-    matrix_operation_memory_efficient,
     vector_multiply_block_storage
 )

sciv/tool/_random_walk_.py CHANGED Viewed

@@ -8,6 +8,7 @@ import torch.nn as nn
 from torch import Tensor
 from tqdm import tqdm
+from joblib import Parallel, delayed
 import numpy as np
 from anndata import AnnData
@@ -23,14 +24,15 @@ from ..util import (
     collection,
     check_adata_get,
     enrichment_optional,
-    check_gpu_availability
+    check_gpu_availability,
+    sparse_data
 )
 __name__: str = "tool_random_walk"
 def _random_walk_cpu_(
-    seed_cell_vector: collection,
+    seed_cell_vector: Union[list, np.ndarray, np.matrix],
     weight: matrix_data = None,
     gamma: float = 0.05,
     epsilon: float = 1e-5,
@@ -46,17 +48,19 @@ def _random_walk_cpu_(
     :return: The value after random walk.
     """
-    w = to_dense(weight)
     # Random walk
-    p0 = seed_cell_vector.copy()[:, np.newaxis]
+    p0 = np.asarray(seed_cell_vector, dtype=float).ravel()[:, np.newaxis]
     pt: matrix_data = p0.copy()
     k = 0
     delta = 1
     # iteration
     while delta > epsilon:
-        p1 = (1 - gamma) * np.dot(w, pt) + gamma * p0
+        if hasattr(weight, "dot"):
+            p1 = (1 - gamma) * weight.dot(pt) + gamma * p0
+        else:
+            p1 = (1 - gamma) * np.dot(weight, pt) + gamma * p0
         # 1 and 2, It would be faster alone
         if p == 1:
@@ -297,7 +301,6 @@ class RandomWalk:
         init_status.obs["clusters"] = init_status.obs["clusters"].astype(str)
-        self.cc_adata = cc_adata
         self.epsilon = epsilon
         self.gamma = gamma
         self.enrichment_gamma = enrichment_gamma
@@ -390,10 +393,12 @@ class RandomWalk:
             self.random_seed_cell = np.zeros(init_status.shape)
         # Transition Probability Matrix
-        self.weight = self._get_weight_(self.cc_adata.X)
+        self.weight = self._get_weight_(cc_adata.X)
         if not is_simple and self.is_ablation:
-            self.weight_m_knn = self._get_weight_(self.cc_adata.layers["cell_mutual_knn"])
+            self.weight_m_knn = self._get_weight_(cc_adata.layers["cell_mutual_knn"])
+        del cc_adata
         self.cluster_types, self.init_seed_cell_size = self._get_cluster_info_()
@@ -419,6 +424,9 @@ class RandomWalk:
                 self.seed_cell_weight_en_ncw
             ) = self._get_seed_cell_(init_data=init_status_no_weight, info="ablation")
+        del self.cell_affinity
+        del init_status
     def _random_walk_(
         self,
         seed_cell_data: matrix_data,
@@ -461,7 +469,7 @@ class RandomWalk:
         return self._random_walk_(seed_cell_data, weight, self.gamma)
     @staticmethod
-    def _get_weight_(cell_cell_matrix: matrix_data) -> matrix_data:
+    def _get_weight_(cell_cell_matrix: matrix_data) -> sparse_data:
         """
         Obtain weights in random walk
         :param cell_cell_matrix: Cell to cell connectivity matrix
@@ -472,7 +480,7 @@ class RandomWalk:
         data_weight = to_dense(cell_cell_matrix, is_array=True)
         cell_sum_weight = data_weight.sum(axis=1)[:, np.newaxis]
         cell_sum_weight[cell_sum_weight == 0] = 1
-        return data_weight / cell_sum_weight
+        return to_sparse(data_weight / cell_sum_weight)
     def _get_cell_weight_(self, seed_cell_size: int) -> matrix_data:
         _cell_cell_knn_: matrix_data = self.cell_affinity.copy()
@@ -592,72 +600,117 @@ class RandomWalk:
         if init_data is None:
             init_data = self.init_status
-        # seed cell threshold
-        seed_cell_count: collection = np.zeros(len(self.trait_list)).astype(int)
-        seed_cell_threshold: collection = np.zeros(len(self.trait_list))
-        seed_cell_weight: matrix_data = np.zeros(self.trs_adata.shape)
-        seed_cell_index: matrix_data = np.zeros(self.trs_adata.shape)
-        seed_cell_weight_en: matrix_data = np.zeros(self.trs_adata.shape)
+        n_traits = len(self.trait_list)
+        n_cells = self.cell_size
+        seed_cell_count = np.zeros(n_traits, dtype=int)
+        seed_cell_threshold = np.zeros(n_traits)
+        seed_cell_weight = np.zeros((n_cells, n_traits))
+        seed_cell_index = np.zeros((n_cells, n_traits), dtype=int)
+        seed_cell_weight_en = np.zeros((n_cells, n_traits))
         if not self.is_simple:
-            seed_cell_matrix: matrix_data = np.zeros(self.trs_adata.shape)
-            seed_cell_matrix_en: matrix_data = np.zeros(self.trs_adata.shape)
+            seed_cell_matrix = np.zeros((n_cells, n_traits))
+            seed_cell_matrix_en = np.zeros((n_cells, n_traits))
         else:
-            seed_cell_matrix: matrix_data = np.zeros((1, 1))
-            seed_cell_matrix_en: matrix_data = np.zeros((1, 1))
+            seed_cell_matrix = np.zeros((1, 1))
+            seed_cell_matrix_en = np.zeros((1, 1))
+        ul.log(__name__).info(f"Calculate {n_traits} traits/diseases for seed cells information.{f' ({info})' if info else ''}")
+        trait_values_all = to_dense(init_data.X, is_array=True)
+        def _process_single_trait(i: int) -> dict:
+            trait_value = trait_values_all[:, i]
+            trait_value_max = trait_value.max()
+            trait_value_min = trait_value.min()
+            if trait_value_min == trait_value_max:
+                return dict(
+                    seed_cell_count=0,
+                    seed_cell_threshold=0.0,
+                    seed_cell_index=None,
+                    seed_cell_weight=None,
+                    seed_cell_en_index=None,
+                    seed_cell_en_weight=None,
+                    seed_cell_matrix=None,
+                    seed_cell_matrix_en=None
+                )
-        ul.log(__name__).info(f"Calculate {len(self.trait_list)} traits/diseases for seed cells information.{f' ({info})' if info is not None else ''}")
-        for i in tqdm(self.trait_range):
+            # 直接获取降序索引
+            trait_value_sort_index = np.argpartition(trait_value, -trait_value.size)[::-1]
+            # 计算 >0 的细胞数
+            _gt0_cell_size = (trait_value > 0).sum()
-            # Obtain all cell score values in a trait
-            trait_adata: AnnData = init_data[:, i]
-            trait_value: collection = to_dense(trait_adata.X, is_array=True).flatten()
+            _seed_cell_size = self._get_seed_cell_size_(_gt0_cell_size)
-            # Obtain the maximum initial score
-            trait_value_max = np.max(trait_value)
-            trait_value_min = np.min(trait_value)
+            # 设置种子细胞索引与权重
+            _seed_cell_index = trait_value_sort_index[:_seed_cell_size]
+            _seed_cell_weight = np.zeros(n_cells)
+            _seed_cell_weight[_seed_cell_index] = self._get_seed_cell_weight_(
+                seed_cell_index=_seed_cell_index, value=trait_value
+            )
-            if trait_value_min != trait_value_max:
+            # 富集区间索引
+            _enrichment_start = _seed_cell_size
+            _enrichment_end = min(2 * _seed_cell_size, self.cell_size - 1)
-                # Obtain a cell count greater than zero
-                trait_value_sort_index = np.argsort(trait_value).astype(int)
-                trait_value_sort_index = trait_value_sort_index[::-1]
-                _gto_cell_index_ = trait_value > 0
-                _gt0_cell_size_ = trait_value[_gto_cell_index_].size
+            if _gt0_cell_size == _seed_cell_size:
+                _enrichment_start = max(_seed_cell_size - self._enrichment_seed_cell_min_count_, 0)
+                _enrichment_end = _seed_cell_size
-                _seed_cell_size_ = self._get_seed_cell_size_(_gt0_cell_size_)
+            _seed_cell_en_index = trait_value_sort_index[_enrichment_start:_enrichment_end]
+            _seed_cell_en_weight = np.zeros(n_cells)
+            _tmp_weight = self._get_seed_cell_weight_(
+                seed_cell_index=_seed_cell_index if len(_seed_cell_en_index) == len(_seed_cell_index) else _seed_cell_en_index,
+                value=trait_value,
+                seed_cell_index_enrichment=_seed_cell_en_index
+            )
+            _seed_cell_en_weight[_seed_cell_en_index] = _tmp_weight
+            # 无权重版本（仅在需要时计算）
+            _seed_cell_matrix = None
+            _seed_cell_matrix_en = None
+            if not self.is_simple and self.is_ablation:
+                seed_cell_value = np.zeros(n_cells)
+                seed_cell_value[_seed_cell_index] = 1
+                _seed_cell_matrix = seed_cell_value / seed_cell_value.sum() if seed_cell_value.sum() else 0
+                seed_cell_en_value = np.zeros(n_cells)
+                seed_cell_en_value[_seed_cell_en_index] = 1
+                _seed_cell_matrix_en = seed_cell_en_value / seed_cell_en_value.sum() if seed_cell_en_value.sum() else 0
+            return dict(
+                seed_cell_count=_seed_cell_size,
+                seed_cell_threshold=trait_value[trait_value_sort_index[_seed_cell_size]],
+                seed_cell_index=_seed_cell_index,
+                seed_cell_weight=_seed_cell_weight,
+                seed_cell_en_index=_seed_cell_en_index,
+                seed_cell_en_weight=_seed_cell_en_weight,
+                seed_cell_matrix=_seed_cell_matrix,
+                seed_cell_matrix_en=_seed_cell_matrix_en
+            )
-                seed_cell_count[i] = _seed_cell_size_
-                seed_cell_threshold[i] = trait_value[trait_value_sort_index[_seed_cell_size_]]
+        # 并行处理所有 trait
+        results = Parallel(n_jobs=-1, backend="threading")(delayed(_process_single_trait)(i) for i in self.trait_range)
-                # Set seed cell weights (reduce noise seed cell weights)
-                _seed_cell_index_ = trait_value_sort_index[0:_seed_cell_size_]
-                seed_cell_index[:, i][_seed_cell_index_] = 1
-                seed_cell_weight[:, i][_seed_cell_index_] = self._get_seed_cell_weight_(seed_cell_index=_seed_cell_index_, value=trait_value)
+        # 将并行结果写回对应数组
+        for i, res in enumerate(results):
-                _enrichment_start_index_: int = _seed_cell_size_
-                _enrichment_end_index_: int = 2 * _seed_cell_size_ if self.cell_size > 2 * _seed_cell_size_ else _seed_cell_size_ - 1
+            if res["seed_cell_index"] is None:
+                continue
-                if _gt0_cell_size_ == _seed_cell_size_:
-                    _enrichment_start_index_ = int(_seed_cell_size_ - self._enrichment_seed_cell_min_count_) if _seed_cell_size_ > self._enrichment_seed_cell_min_count_ else (
-                        (_seed_cell_size_ - 1) if _seed_cell_size_ > 2 else 0)
-                    _enrichment_end_index_ = _seed_cell_size_
+            seed_cell_count[i] = res["seed_cell_count"]
+            seed_cell_threshold[i] = res["seed_cell_threshold"]
+            seed_cell_index[res["seed_cell_index"], i] = 1
+            seed_cell_weight[:, i] = res["seed_cell_weight"]
+            seed_cell_weight_en[res["seed_cell_en_index"], i] = res["seed_cell_en_weight"]
-                _seed_cell_en_index_ = trait_value_sort_index[_enrichment_start_index_:_enrichment_end_index_]
-                _seed_cell_en_weight_ = self._get_seed_cell_weight_(
-                    seed_cell_index=_seed_cell_index_ if len(_seed_cell_en_index_) == len(_seed_cell_index_) else _seed_cell_en_index_, value=trait_value,
-                    seed_cell_index_enrichment=_seed_cell_en_index_
-                )
-                seed_cell_weight_en[:, i][_seed_cell_en_index_] = _seed_cell_en_weight_
-                if not self.is_simple and self.is_ablation:
-                    # Without weight
-                    seed_cell_value = np.zeros(self.cell_size)
-                    seed_cell_value[_seed_cell_index_] = 1
-                    seed_cell_matrix[:, i] = seed_cell_value / (1 if seed_cell_value.sum() == 0 else seed_cell_value.sum())
-                    seed_cell_en_value = np.zeros(self.cell_size)
-                    seed_cell_en_value[_seed_cell_en_index_] = 1
-                    seed_cell_matrix_en[:, i] = seed_cell_en_value / (1 if seed_cell_en_value.sum() == 0 else seed_cell_en_value.sum())
+            if not self.is_simple and self.is_ablation:
+                seed_cell_matrix[:, i] = res["seed_cell_matrix"]
+                seed_cell_matrix_en[:, i] = res["seed_cell_matrix_en"]
         return seed_cell_count, seed_cell_threshold, seed_cell_matrix, seed_cell_weight, seed_cell_index, seed_cell_matrix_en, seed_cell_weight_en

{sciv-0.0.82.dist-info → sciv-0.0.84.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sciv
-Version: 0.0.82
+Version: 0.0.84
 Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
 Project-URL: github, https://github.com/YuZhengM/sciv
 Author-email: Zheng-Min Yu <yuzmbio@163.com>

{sciv-0.0.82.dist-info → sciv-0.0.84.dist-info}/RECORD RENAMED Viewed

@@ -27,13 +27,13 @@ sciv/preprocessing/_scanpy_.py,sha256=mmkk4cMCzJCziF49RnOuXBiF4frS6aSiwZdUmfDAg4
 sciv/preprocessing/_scvi_.py,sha256=ZIDkQ_4deYmzSMiAbu5C3j_jMMl7hBTFLCBXHCNj3B4,10332
 sciv/preprocessing/_snapatac_.py,sha256=Dq8CHF7Psl3CQszaEokQYO56Oe2uzyWOy_cGlaOywfc,27798
 sciv/tool/__init__.py,sha256=WXzHkWt6RgBC3qqD-98nR5wQmt6oC850ox_VpMrapSU,2468
-sciv/tool/_algorithm_.py,sha256=6xLGB1-FRfRiHSCVb_tHvzY_N-RoMZ79p0O2fEio688,48030
+sciv/tool/_algorithm_.py,sha256=okGpH2OrBTO59LkyznT4gRi5S45oAcnO10Kxo5Xzy4I,47991
 sciv/tool/_matrix_.py,sha256=O1EAhA9wxh06P_eOxEBesK7kO7IExKlhH6uJzGh1HBM,24322
-sciv/tool/_random_walk_.py,sha256=98HLa9X2xx3Tj7VKKwQ2oS-CWL7HbOURAXiYKky2OYs,47338
+sciv/tool/_random_walk_.py,sha256=XE7LlTFn5vHVth47cLtIJC6D4vwOsNM5sgrFwx-KL3g,48671
 sciv/util/__init__.py,sha256=nOxZ8if27X7AUJ6hZwTwxOJwIBJb0obWlHjqCzjg_Gc,1964
 sciv/util/_constant_.py,sha256=w0wKQd8guLd1ZTW24_5aECrWsIWDiNQmEpLsWlHar1A,3000
 sciv/util/_core_.py,sha256=ZD2uSnEBHVu0i9TmXWzri_3bXZzYKnIZk818gW3zadE,14751
-sciv-0.0.82.dist-info/METADATA,sha256=JTluLyVcmc6vYfTh76ejiHiT0fnqSEVJa9XngVEGj2U,3465
-sciv-0.0.82.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-sciv-0.0.82.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
-sciv-0.0.82.dist-info/RECORD,,
+sciv-0.0.84.dist-info/METADATA,sha256=XIG0HyGsbObIpMSR8_t0W9IZIAUy-shlHS07ziGAAGI,3465
+sciv-0.0.84.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+sciv-0.0.84.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
+sciv-0.0.84.dist-info/RECORD,,

{sciv-0.0.82.dist-info → sciv-0.0.84.dist-info}/WHEEL RENAMED Viewed

File without changes

{sciv-0.0.82.dist-info → sciv-0.0.84.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sciv 0.0.82__py3-none-any.whl → 0.0.84__py3-none-any.whl

sciv 0.0.82py3-none-any.whl → 0.0.84py3-none-any.whl