PyPI - sciv - Versions diffs - 0.0.96__py3-none-any.whl → 0.0.97__py3-none-any.whl - Mend

sciv 0.0.96py3-none-any.whl → 0.0.97py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

sciv/model/_core_.py +2 -2
sciv/tool/_algorithm_.py +47 -18
sciv/util/_core_.py +6 -4
{sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/METADATA +1 -1
{sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/RECORD +7 -7
{sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/WHEEL +0 -0
{sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/licenses/LICENSE +0 -0

sciv/model/_core_.py CHANGED Viewed

@@ -81,7 +81,7 @@ def core(
     weight: float = 0.1,
     kernel: Literal["laplacian", "gaussian"] = "gaussian",
     local_k: int = 10,
-    kernel_gamma: Optional[float, collection] = None,
+    kernel_gamma: Optional[Union[float, collection]] = None,
     epsilon: float = 1e-05,
     gamma: float = 0.05,
     enrichment_gamma: float = 0.05,
@@ -481,7 +481,7 @@ def core(
             )
     else:
-        overlap_adata: AnnData = overlap_sum(adata, variants, trait_info)
+        overlap_adata: AnnData = overlap_sum(adata, variants, trait_info, n_jobs=n_jobs)
     del variants, trait_info

sciv/tool/_algorithm_.py CHANGED Viewed

@@ -7,6 +7,8 @@ from typing import Union, Tuple, Literal, Optional
 from scipy import sparse
 from scipy.stats import norm
 from tqdm import tqdm
+from joblib import Parallel, delayed
+import multiprocessing
 import numpy as np
 from anndata import AnnData
@@ -516,7 +518,7 @@ def semi_mutual_knn_weight(
     adj_weight = (1 - weight) * adj_and.astype(np.float32) + weight * adj_or.astype(np.float32)
     # Ensure full connectivity if required
-    if is_mknn_fully_connected and (or_neighbors == 0 or weight == 0):
+    if is_mknn_fully_connected:
         adj_1nn = _knn(new_data, 1)
         if sparse.issparse(adj_and):
@@ -824,16 +826,17 @@ def _overlap_(regions_sort: DataFrame, variants: DataFrame) -> DataFrame:
         if chr_a in chr_keys:
             # get chr variant
             variants_chr_type_position_list = variants_position_list[chr_a]
             # judge start and end position
             if start <= variants_chr_type_position_list[-1] and end >= variants_chr_type_position_list[0]:
                 # get index
-                start_index = get_index(start, variants_chr_type_position_list)
-                end_index = get_index(end, variants_chr_type_position_list)
+                start_index = get_index(start, variants_chr_type_position_list, False)
+                end_index = get_index(end, variants_chr_type_position_list, False)
                 # Determine whether it is equal, Equality means there is no overlap
                 if start_index != end_index:
-                    start_index = start_index if isinstance(start_index, number) else start_index[1]
-                    end_index = end_index + 1 if isinstance(end_index, number) else end_index[1]
+                    start_index = start_index if isinstance(start_index, int) else start_index[1]
+                    end_index = end_index + 1 if isinstance(end_index, int) else end_index[1]
                     if start_index > end_index:
                         ul.log(__name__).error("The end index in the region is greater than the start index.")
@@ -894,13 +897,14 @@ def overlap(regions: DataFrame, variants: DataFrame) -> DataFrame:
     return _overlap_(regions_sort, variants)
-def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnData:
+def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame, n_jobs: int = -1) -> AnnData:
     """
     Overlap regional data and mutation data and sum the PP values of all mutations in a region as the values for that
     region.
     :param regions: peaks data
     :param variants: variants data
     :param trait_info: traits information
+    :param n_jobs: The maximum number of concurrently running jobs
     :return: overlap data
     """
@@ -916,8 +920,6 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
     trait_names = trait_info["id"].tolist()
     n_trait = len(trait_names)
-    # Pre-allocate sparse matrix, fill column by column, then convert to csc and then csr for efficiency
-    row_indices, col_indices, data_vals = [], [], []
     # Check column existence once
     required = {"chr", "start", "end"}
@@ -941,13 +943,18 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
     ul.log(__name__).info("Obtain peak-trait/disease matrix. (overlap variant information)")
-    # The outer loop can be further accelerated by parallelizing over traits; here we keep it single-threaded for now.
-    for col_idx, trait_name in enumerate(tqdm(trait_names)):
+    # Function to process a single trait
+    def _process_trait_(trait_name, col_idx):
+        local_data_vals = []
+        local_row_indices = []
+        local_col_indices = []
         variant: AnnData = variants[trait_name]
         overlap_df: DataFrame = _overlap_(regions_df, variant.obs)
         if overlap_df.empty:
-            continue
+            return local_data_vals, local_row_indices, local_col_indices
         # Sum at once: first group by label and collect variant_id into a list
         label_var_ids = (
@@ -972,15 +979,37 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
             if matrix_sum.size == 1:
                 val = float(matrix_sum)
                 if val != 0:
-                    row_indices.append(row_idx)
-                    col_indices.append(col_idx)
-                    data_vals.append(val)
+                    local_row_indices.append(row_idx)
+                    local_col_indices.append(col_idx)
+                    local_data_vals.append(val)
             else:
                 for t_idx, v in enumerate(matrix_sum):
                     if v != 0:
-                        row_indices.append(row_idx)
-                        col_indices.append(col_idx + t_idx)
-                        data_vals.append(float(v))
+                        local_row_indices.append(row_idx)
+                        local_col_indices.append(col_idx + t_idx)
+                        local_data_vals.append(float(v))
+        return local_data_vals, local_row_indices, local_col_indices
+    # Use Parallel to process traits in parallel
+    results = Parallel(n_jobs=n_jobs)(
+        delayed(_process_trait_)(trait_name, col_idx) for col_idx, trait_name in enumerate(trait_names)
+    )
+    # Preallocate length to avoid list dynamic expansion
+    total = sum(len(ld) for ld, _, _ in results)
+    row_indices = np.empty(total, dtype=np.int32)
+    col_indices = np.empty(total, dtype=np.int32)
+    data_vals  = np.empty(total, dtype=np.float32)
+    ptr = 0
+    for local_data, local_rows, local_cols in results:
+        n = len(local_data)
+        row_indices[ptr:ptr+n] = local_rows
+        col_indices[ptr:ptr+n] = local_cols
+        data_vals[ptr:ptr+n] = local_data
+        ptr += n
     # Build sparse matrix, then convert to csr format
     overlap_sparse = sparse.csc_matrix(
@@ -1253,7 +1282,7 @@ def obtain_cell_cell_network(
     weight: float = 0.1,
     kernel: Literal["laplacian", "gaussian"] = "gaussian",
     local_k: int = 10,
-    gamma: Optional[float, collection] = None,
+    gamma: Optional[Union[float, collection]] = None,
     is_simple: bool = True
 ) -> AnnData:
     """

sciv/util/_core_.py CHANGED Viewed

@@ -90,17 +90,20 @@ def sum_min_max(data: matrix_data, axis: int = 1) -> Tuple[number, number]:
     return min(rows_sum), max(rows_sum)
-def get_index(position: number, positions_list: list) -> Union[number, Tuple[number, number]]:
+def get_index(position: number, positions_list: list, is_sort: bool = True) -> Union[int, Tuple[int, int]]:
     """
     Search for position information. Similar to half search.
         If the position exists in the list, return the index.
         If it does not exist, return the index located between the two indexes
     :param position: position
     :param positions_list: position list
+    :param is_sort: True
     :return: position index
     """
-    # sort
-    positions_list.sort()
+    if is_sort:
+        positions_list.sort()
     # search
     position_size: int = len(positions_list)
     left, right = 0, position_size - 1
@@ -441,7 +444,6 @@ def plot_end(
     close: bool = False,
     dpi: float = 300
 ):
     if title is not None:
         plt.title(title)

{sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sciv
-Version: 0.0.96
+Version: 0.0.97
 Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
 Project-URL: github, https://github.com/YuZhengM/sciv
 Author-email: Zheng-Min Yu <yuzmbio@163.com>

{sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ sciv/file/__init__.py,sha256=8cYLG0S0nilblmyX46CWFrbLr-rmLbO1EEO477pZ-gk,520
 sciv/file/_read_.py,sha256=UZJpN3_5hBiTjzEYO6YXORcE_dqA8HmLpV80nqTLNSo,30554
 sciv/file/_write_.py,sha256=W3M9CmPi7BuKAffz1fdi-vA5DzAFZ7wmcggp33N9Xtg,7848
 sciv/model/__init__.py,sha256=k8SO9FpJaGn2ANqJyaz3HXMas7jH9toPVtpw703kOqg,149
-sciv/model/_core_.py,sha256=3GjKG5w-cTBF53LpSrFrMFnqwtgMI2_BkSlLGFMMMj8,33255
+sciv/model/_core_.py,sha256=rKIEhVz67HM7hXkEUpUR9l7uyIK72jK1zANPVpB_Hcc,33277
 sciv/plot/__init__.py,sha256=2tRNT6TZNz9r38lnna712RGsH7OJ2QkGa37XKgzejHQ,1865
 sciv/plot/_bar_.py,sha256=xWpFbJTHgQMLuUSXa4uE69RGHXSCGinapxL-1imdDcU,14355
 sciv/plot/_barcode_.py,sha256=RDOedQ8ZtXWFyJ2c772RDfqO4TMIpHMvcMZMAVqky90,5073
@@ -27,13 +27,13 @@ sciv/preprocessing/_scanpy_.py,sha256=tB8BD2wpLAU8_YxdqrgNtcjpNXNRo-JCdm2lxaKDBL
 sciv/preprocessing/_scvi_.py,sha256=7QxwPA2kR_g15X28aEak7AFA4kyQ-UbtpiLH-rc5Ksg,10780
 sciv/preprocessing/_snapatac_.py,sha256=Dq8CHF7Psl3CQszaEokQYO56Oe2uzyWOy_cGlaOywfc,27798
 sciv/tool/__init__.py,sha256=WXzHkWt6RgBC3qqD-98nR5wQmt6oC850ox_VpMrapSU,2468
-sciv/tool/_algorithm_.py,sha256=uzA__knaIgIMcnFK-JwKT4sIbCETh94eYH-2WB_BpXc,52728
+sciv/tool/_algorithm_.py,sha256=BzUUBn22R12PoQHSjhcKZfj6Yw01Qh_VIgl8RXgWvfY,53580
 sciv/tool/_matrix_.py,sha256=SnC3sXic_ufuEXStcD_HncvYH6apBdNK6nhG6jFLmjA,24324
 sciv/tool/_random_walk_.py,sha256=JOB97XLxlZYHvlIST1wlXgA0mw6fybkWnJGq6X_kbsk,48871
 sciv/util/__init__.py,sha256=nOxZ8if27X7AUJ6hZwTwxOJwIBJb0obWlHjqCzjg_Gc,1964
 sciv/util/_constant_.py,sha256=w0wKQd8guLd1ZTW24_5aECrWsIWDiNQmEpLsWlHar1A,3000
-sciv/util/_core_.py,sha256=hF33ybPcoVlapZsm-2Etem-p_rQUqXlsdaQgZv5jD7w,14867
-sciv-0.0.96.dist-info/METADATA,sha256=xLbV5NRQL7Q3aA25Peb8Idk1PjZ7rM6yXlJsH7VM_OI,3465
-sciv-0.0.96.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-sciv-0.0.96.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
-sciv-0.0.96.dist-info/RECORD,,
+sciv/util/_core_.py,sha256=TUWfBNRJzWuoQ9ffew_DjnlkNydG-Rmujl_RH4Ln9io,14917
+sciv-0.0.97.dist-info/METADATA,sha256=lCXJ0ySEYDpLmHDMuR7FXcg5bEKA_THBDG1aCqH7Siw,3465
+sciv-0.0.97.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+sciv-0.0.97.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
+sciv-0.0.97.dist-info/RECORD,,

{sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/WHEEL RENAMED Viewed

File without changes

{sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sciv 0.0.96__py3-none-any.whl → 0.0.97__py3-none-any.whl

sciv 0.0.96py3-none-any.whl → 0.0.97py3-none-any.whl