PyPI - sciv - Versions diffs - 0.0.96__py3-none-any.whl → 0.0.98__py3-none-any.whl - Mend

sciv 0.0.96py3-none-any.whl → 0.0.98py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

sciv/model/_core_.py +25 -13
sciv/tool/_algorithm_.py +55 -18
sciv/util/_core_.py +6 -4
{sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/METADATA +1 -1
{sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/RECORD +7 -7
{sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/WHEEL +0 -0
{sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/licenses/LICENSE +0 -0

sciv/model/_core_.py CHANGED Viewed

@@ -15,7 +15,7 @@ from ..tool import RandomWalk, overlap_sum, obtain_cell_cell_network, calculate_
 from ..file import save_h5ad, save_pkl, read_h5ad, read_pkl
 from ..preprocessing import filter_data, poisson_vi
-from ..util import path, enrichment_optional, to_dense, collection, to_sparse, difference_peak_optional
+from ..util import path, enrichment_optional, to_dense, collection, to_sparse, difference_peak_optional, project_name
 __name__: str = "model_core"
@@ -81,7 +81,7 @@ def core(
     weight: float = 0.1,
     kernel: Literal["laplacian", "gaussian"] = "gaussian",
     local_k: int = 10,
-    kernel_gamma: Optional[float, collection] = None,
+    kernel_gamma: Optional[Union[float, collection]] = None,
     epsilon: float = 1e-05,
     gamma: float = 0.05,
     enrichment_gamma: float = 0.05,
@@ -453,7 +453,7 @@ def core(
             model_dir=model_dir
         )
-    step1_time = adata.uns["elapsed_time"] + da_peaks.uns["elapsed_time"]
+    poisson_vi_time = adata.uns["elapsed_time"] + da_peaks.uns["elapsed_time"]
     if save_path is not None:
@@ -481,11 +481,11 @@ def core(
             )
     else:
-        overlap_adata: AnnData = overlap_sum(adata, variants, trait_info)
+        overlap_adata: AnnData = overlap_sum(adata, variants, trait_info, n_jobs=n_jobs)
     del variants, trait_info
-    step2_time = overlap_adata.uns["elapsed_time"]
+    overlap_time = overlap_adata.uns["elapsed_time"]
     if save_path is not None and not overlap_is_read:
         save_h5ad(overlap_adata, file=atac_overlap_save_file)
@@ -518,7 +518,7 @@ def core(
     del da_peaks, overlap_adata
-    step3_time = init_score.uns["elapsed_time"]
+    init_score_time = init_score.uns["elapsed_time"]
     if save_path is not None and not init_score_is_read:
         save_h5ad(init_score, file=init_score_save_file)
@@ -546,13 +546,13 @@ def core(
     del adata
-    step4_time = cc_data.uns["elapsed_time"]
+    smknn_time = cc_data.uns["elapsed_time"]
     if save_path is not None and not cc_data_is_read:
         save_h5ad(cc_data, file=cc_data_save_file)
     """
-    5. Random walk
+    5. Random walk with weighted seed cells
     """
     random_walk_is_read: bool = is_file_exist_loading and os.path.exists(random_walk_save_file) and is_save_random_walk_model
@@ -584,17 +584,29 @@ def core(
     del random_walk_is_read, init_score, cc_data
     trs = _run_random_walk_(random_walk, is_ablation, is_simple)
+    trs.uns["params"] = params
+    del params
-    step5_time = random_walk.elapsed_time
+    random_walk_time = random_walk.elapsed_time
     # end time
     elapsed_time = time.time() - start_time
-    step_time = step1_time + step2_time + step3_time + step4_time + step5_time
+    step_time = poisson_vi_time + overlap_time + init_score_time + smknn_time + random_walk_time
-    params.update({"elapsed_time": elapsed_time if elapsed_time > step_time else step_time})
-    trs.uns["params"] = params
+    if elapsed_time < step_time:
+        elapsed_time = step_time
-    del params
+    ul.log(__name__).info(f"Algorithm {project_name} consumes a total of {elapsed_time} seconds.")
+    trs.uns["elapsed_time"] = {
+        "PoissonVI": poisson_vi_time,
+        "Overlap": overlap_time,
+        "initial TRS": init_score_time,
+        "SM-kNN": smknn_time,
+        "Random walk": random_walk_time,
+        "Total time": elapsed_time
+    }
     if save_path is not None:
         save_h5ad(trs, file=trs_save_file)

sciv/tool/_algorithm_.py CHANGED Viewed

@@ -7,6 +7,8 @@ from typing import Union, Tuple, Literal, Optional
 from scipy import sparse
 from scipy.stats import norm
 from tqdm import tqdm
+from joblib import Parallel, delayed
+import multiprocessing
 import numpy as np
 from anndata import AnnData
@@ -516,7 +518,7 @@ def semi_mutual_knn_weight(
     adj_weight = (1 - weight) * adj_and.astype(np.float32) + weight * adj_or.astype(np.float32)
     # Ensure full connectivity if required
-    if is_mknn_fully_connected and (or_neighbors == 0 or weight == 0):
+    if is_mknn_fully_connected:
         adj_1nn = _knn(new_data, 1)
         if sparse.issparse(adj_and):
@@ -824,16 +826,17 @@ def _overlap_(regions_sort: DataFrame, variants: DataFrame) -> DataFrame:
         if chr_a in chr_keys:
             # get chr variant
             variants_chr_type_position_list = variants_position_list[chr_a]
             # judge start and end position
             if start <= variants_chr_type_position_list[-1] and end >= variants_chr_type_position_list[0]:
                 # get index
-                start_index = get_index(start, variants_chr_type_position_list)
-                end_index = get_index(end, variants_chr_type_position_list)
+                start_index = get_index(start, variants_chr_type_position_list, False)
+                end_index = get_index(end, variants_chr_type_position_list, False)
                 # Determine whether it is equal, Equality means there is no overlap
                 if start_index != end_index:
-                    start_index = start_index if isinstance(start_index, number) else start_index[1]
-                    end_index = end_index + 1 if isinstance(end_index, number) else end_index[1]
+                    start_index = start_index if isinstance(start_index, int) else start_index[1]
+                    end_index = end_index + 1 if isinstance(end_index, int) else end_index[1]
                     if start_index > end_index:
                         ul.log(__name__).error("The end index in the region is greater than the start index.")
@@ -894,13 +897,14 @@ def overlap(regions: DataFrame, variants: DataFrame) -> DataFrame:
     return _overlap_(regions_sort, variants)
-def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnData:
+def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame, n_jobs: int = -1) -> AnnData:
     """
     Overlap regional data and mutation data and sum the PP values of all mutations in a region as the values for that
     region.
     :param regions: peaks data
     :param variants: variants data
     :param trait_info: traits information
+    :param n_jobs: The maximum number of concurrently running jobs
     :return: overlap data
     """
@@ -916,8 +920,6 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
     trait_names = trait_info["id"].tolist()
     n_trait = len(trait_names)
-    # Pre-allocate sparse matrix, fill column by column, then convert to csc and then csr for efficiency
-    row_indices, col_indices, data_vals = [], [], []
     # Check column existence once
     required = {"chr", "start", "end"}
@@ -941,13 +943,18 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
     ul.log(__name__).info("Obtain peak-trait/disease matrix. (overlap variant information)")
-    # The outer loop can be further accelerated by parallelizing over traits; here we keep it single-threaded for now.
-    for col_idx, trait_name in enumerate(tqdm(trait_names)):
+    # Function to process a single trait
+    def _process_trait_(trait_name, col_idx):
+        local_data_vals = []
+        local_row_indices = []
+        local_col_indices = []
         variant: AnnData = variants[trait_name]
         overlap_df: DataFrame = _overlap_(regions_df, variant.obs)
         if overlap_df.empty:
-            continue
+            return local_data_vals, local_row_indices, local_col_indices
         # Sum at once: first group by label and collect variant_id into a list
         label_var_ids = (
@@ -972,15 +979,37 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
             if matrix_sum.size == 1:
                 val = float(matrix_sum)
                 if val != 0:
-                    row_indices.append(row_idx)
-                    col_indices.append(col_idx)
-                    data_vals.append(val)
+                    local_row_indices.append(row_idx)
+                    local_col_indices.append(col_idx)
+                    local_data_vals.append(val)
             else:
                 for t_idx, v in enumerate(matrix_sum):
                     if v != 0:
-                        row_indices.append(row_idx)
-                        col_indices.append(col_idx + t_idx)
-                        data_vals.append(float(v))
+                        local_row_indices.append(row_idx)
+                        local_col_indices.append(col_idx + t_idx)
+                        local_data_vals.append(float(v))
+        return local_data_vals, local_row_indices, local_col_indices
+    # Use Parallel to process traits in parallel
+    results = Parallel(n_jobs=n_jobs)(
+        delayed(_process_trait_)(trait_name, col_idx) for col_idx, trait_name in tqdm(enumerate(trait_names))
+    )
+    # Preallocate length to avoid list dynamic expansion
+    total = sum(len(ld) for ld, _, _ in results)
+    row_indices = np.empty(total, dtype=np.int32)
+    col_indices = np.empty(total, dtype=np.int32)
+    data_vals  = np.empty(total, dtype=np.float32)
+    ptr = 0
+    for local_data, local_rows, local_cols in results:
+        n = len(local_data)
+        row_indices[ptr:ptr+n] = local_rows
+        col_indices[ptr:ptr+n] = local_cols
+        data_vals[ptr:ptr+n] = local_data
+        ptr += n
     # Build sparse matrix, then convert to csr format
     overlap_sparse = sparse.csc_matrix(
@@ -1205,10 +1234,18 @@ def calculate_init_score_weight(
     ul.log(__name__).info("Calculate initial trait relevance scores")
     _init_trs_weight_ = np.multiply(_init_trs_ncw_, _cell_type_weight_)
+    if hasattr(_init_trs_weight_, "A"):
+        _init_trs_weight_ = _init_trs_weight_.A
     init_trs_adata = AnnData(_init_trs_weight_, obs=cell_anno, var=trait_anno)
     del _init_trs_weight_
     if not is_simple:
+        if hasattr(_init_trs_ncw_, "A"):
+            _init_trs_ncw_ = _init_trs_ncw_.A
         init_trs_adata.layers["init_trs_ncw"] = _init_trs_ncw_
         init_trs_adata.layers["cell_type_weight"] = to_sparse(_cell_type_weight_)
         init_trs_adata.uns["cluster_weight_factor"] = da_peaks_adata.obsm["cluster_weight"]
@@ -1253,7 +1290,7 @@ def obtain_cell_cell_network(
     weight: float = 0.1,
     kernel: Literal["laplacian", "gaussian"] = "gaussian",
     local_k: int = 10,
-    gamma: Optional[float, collection] = None,
+    gamma: Optional[Union[float, collection]] = None,
     is_simple: bool = True
 ) -> AnnData:
     """

sciv/util/_core_.py CHANGED Viewed

@@ -90,17 +90,20 @@ def sum_min_max(data: matrix_data, axis: int = 1) -> Tuple[number, number]:
     return min(rows_sum), max(rows_sum)
-def get_index(position: number, positions_list: list) -> Union[number, Tuple[number, number]]:
+def get_index(position: number, positions_list: list, is_sort: bool = True) -> Union[int, Tuple[int, int]]:
     """
     Search for position information. Similar to half search.
         If the position exists in the list, return the index.
         If it does not exist, return the index located between the two indexes
     :param position: position
     :param positions_list: position list
+    :param is_sort: True
     :return: position index
     """
-    # sort
-    positions_list.sort()
+    if is_sort:
+        positions_list.sort()
     # search
     position_size: int = len(positions_list)
     left, right = 0, position_size - 1
@@ -441,7 +444,6 @@ def plot_end(
     close: bool = False,
     dpi: float = 300
 ):
     if title is not None:
         plt.title(title)

{sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sciv
-Version: 0.0.96
+Version: 0.0.98
 Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
 Project-URL: github, https://github.com/YuZhengM/sciv
 Author-email: Zheng-Min Yu <yuzmbio@163.com>

{sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/RECORD RENAMED Viewed

@@ -3,7 +3,7 @@ sciv/file/__init__.py,sha256=8cYLG0S0nilblmyX46CWFrbLr-rmLbO1EEO477pZ-gk,520
 sciv/file/_read_.py,sha256=UZJpN3_5hBiTjzEYO6YXORcE_dqA8HmLpV80nqTLNSo,30554
 sciv/file/_write_.py,sha256=W3M9CmPi7BuKAffz1fdi-vA5DzAFZ7wmcggp33N9Xtg,7848
 sciv/model/__init__.py,sha256=k8SO9FpJaGn2ANqJyaz3HXMas7jH9toPVtpw703kOqg,149
-sciv/model/_core_.py,sha256=3GjKG5w-cTBF53LpSrFrMFnqwtgMI2_BkSlLGFMMMj8,33255
+sciv/model/_core_.py,sha256=HCXPFOxfed5TUJMI-pZV9cx9_2R8z4dNPb2f7Hp3mVc,33693
 sciv/plot/__init__.py,sha256=2tRNT6TZNz9r38lnna712RGsH7OJ2QkGa37XKgzejHQ,1865
 sciv/plot/_bar_.py,sha256=xWpFbJTHgQMLuUSXa4uE69RGHXSCGinapxL-1imdDcU,14355
 sciv/plot/_barcode_.py,sha256=RDOedQ8ZtXWFyJ2c772RDfqO4TMIpHMvcMZMAVqky90,5073
@@ -27,13 +27,13 @@ sciv/preprocessing/_scanpy_.py,sha256=tB8BD2wpLAU8_YxdqrgNtcjpNXNRo-JCdm2lxaKDBL
 sciv/preprocessing/_scvi_.py,sha256=7QxwPA2kR_g15X28aEak7AFA4kyQ-UbtpiLH-rc5Ksg,10780
 sciv/preprocessing/_snapatac_.py,sha256=Dq8CHF7Psl3CQszaEokQYO56Oe2uzyWOy_cGlaOywfc,27798
 sciv/tool/__init__.py,sha256=WXzHkWt6RgBC3qqD-98nR5wQmt6oC850ox_VpMrapSU,2468
-sciv/tool/_algorithm_.py,sha256=uzA__knaIgIMcnFK-JwKT4sIbCETh94eYH-2WB_BpXc,52728
+sciv/tool/_algorithm_.py,sha256=mYKfSuYGelLd2secwyqGPxBQYd3x2yDKw1z7HK8mqYE,53773
 sciv/tool/_matrix_.py,sha256=SnC3sXic_ufuEXStcD_HncvYH6apBdNK6nhG6jFLmjA,24324
 sciv/tool/_random_walk_.py,sha256=JOB97XLxlZYHvlIST1wlXgA0mw6fybkWnJGq6X_kbsk,48871
 sciv/util/__init__.py,sha256=nOxZ8if27X7AUJ6hZwTwxOJwIBJb0obWlHjqCzjg_Gc,1964
 sciv/util/_constant_.py,sha256=w0wKQd8guLd1ZTW24_5aECrWsIWDiNQmEpLsWlHar1A,3000
-sciv/util/_core_.py,sha256=hF33ybPcoVlapZsm-2Etem-p_rQUqXlsdaQgZv5jD7w,14867
-sciv-0.0.96.dist-info/METADATA,sha256=xLbV5NRQL7Q3aA25Peb8Idk1PjZ7rM6yXlJsH7VM_OI,3465
-sciv-0.0.96.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-sciv-0.0.96.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
-sciv-0.0.96.dist-info/RECORD,,
+sciv/util/_core_.py,sha256=TUWfBNRJzWuoQ9ffew_DjnlkNydG-Rmujl_RH4Ln9io,14917
+sciv-0.0.98.dist-info/METADATA,sha256=yZiAx2el-OqapNmZNjUpjAmTAmQbIcBIZa7QFXjYPYc,3465
+sciv-0.0.98.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+sciv-0.0.98.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
+sciv-0.0.98.dist-info/RECORD,,

{sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/WHEEL RENAMED Viewed

File without changes

{sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

sciv 0.0.96__py3-none-any.whl → 0.0.98__py3-none-any.whl

sciv 0.0.96py3-none-any.whl → 0.0.98py3-none-any.whl