PyPI - sciv - Versions diffs - 0.0.89__tar.gz → 0.0.91__tar.gz - Mend

sciv 0.0.89tar.gz → 0.0.91tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

{sciv-0.0.89 → sciv-0.0.91}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: sciv
-Version: 0.0.89
+Version: 0.0.91
 Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
 Project-URL: github, https://github.com/YuZhengM/sciv
 Author-email: Zheng-Min Yu <yuzmbio@163.com>

{sciv-0.0.89 → sciv-0.0.91}/pyproject.toml RENAMED Viewed

@@ -3,7 +3,7 @@ requires = ["hatchling"]
 build-backend = "hatchling.build"
 [project]
 name = "sciv"
-version = "0.0.89"
+version = "0.0.91"
 authors = [
     { name = "Zheng-Min Yu", email = "yuzmbio@163.com" },
 ]

{sciv-0.0.89 → sciv-0.0.91}/src/sciv/preprocessing/_scvi_.py RENAMED Viewed

@@ -8,6 +8,7 @@ import numpy as np
 import pandas as pd
 from anndata import AnnData
+from torch import OutOfMemoryError
 from .. import util as ul
 from ..tool import umap, tsne
@@ -126,11 +127,17 @@ def poisson_vi(
     if model_dir is not None:
         if os.path.exists(os.path.join(model_dir, "model.pt")):
             ul.log(__name__).info(f"Due to the existence of file `model.pt`, it is loaded by default.")
             try:
                 model = scvi.external.POISSONVI.load(model_dir, adata=adata)
-            except FileExistsError as fee:
-                ul.log(__name__).error(f"File `model.pt` failed to load, you can execute `Poisson VI` again by deleting file `model.pt` ({model_dir}/model.pt).\n {fee}")
-                raise ValueError(f"File `model.pt` failed to load, you can execute `Poisson VI` again by deleting file `model.pt` ({model_dir}/model.pt).")
+            except OutOfMemoryError as ome:
+                ul.log(__name__).warning(f"GPU failed to run, try to switch to CPU running.\n {ome}")
+                try:
+                    model = scvi.external.POISSONVI.load(model_dir, adata=adata, accelerator="cpu", devices="cpu")
+                except Exception as e:
+                    ul.log(__name__).error(f"File `model.pt` failed to load, you can execute `Poisson VI` again by deleting file `model.pt` ({model_dir}/model.pt).\n {e}")
+                    raise ValueError(f"File `model.pt` failed to load, you can execute `Poisson VI` again by deleting file `model.pt` ({model_dir}/model.pt).")
         else:
             ul.file_method(__name__).makedirs(model_dir)
             model = __train__()

{sciv-0.0.89 → sciv-0.0.91}/src/sciv/tool/_algorithm_.py RENAMED Viewed

@@ -814,7 +814,9 @@ def _overlap_(regions_sort: DataFrame, variants: DataFrame) -> DataFrame:
     variants_overlap_info_list: list = []
-    for index, chr_a, start, end in zip(regions_sort["index"], regions_sort["chr"], regions_sort["start"],
+    for index, chr_a, start, end in zip(regions_sort["index"],
+                                        regions_sort["chr"],
+                                        regions_sort["start"],
                                         regions_sort["end"]):
         # judge chr
@@ -906,15 +908,15 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
     # Peak number
     label_all_size: int = len(label_all)
-    # 预先把 peaks 的 index 做成 dict，O(1) 查找
+    # Pre-build a dict of peak indices for O(1) lookup
     label2idx = {lb: i for i, lb in enumerate(label_all)}
     trait_names = trait_info["id"].tolist()
     n_trait = len(trait_names)
-    # 提前分配稀疏矩阵，按列填充，最后一次性转成 csc 再转 csr，省内存且快
+    # Pre-allocate sparse matrix, fill column by column, then convert to csc and then csr for efficiency
     row_indices, col_indices, data_vals = [], [], []
-    # 检查列存在性一次完成
+    # Check column existence once
     required = {"chr", "start", "end"}
     if not required.issubset(regions.var.columns):
@@ -936,7 +938,7 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
     ul.log(__name__).info("Obtain peak-trait/disease matrix. (overlap variant information)")
-    # 外层循环按 trait 并行可再加速，这里先保持单循环
+    # The outer loop can be further accelerated by parallelizing over traits; here we keep it single-threaded for now.
     for col_idx, trait_name in enumerate(tqdm(trait_names)):
         variant: AnnData = variants[trait_name]
         overlap_df: DataFrame = _overlap_(regions_df, variant.obs)
@@ -944,34 +946,26 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
         if overlap_df.empty:
             continue
-        # 直接拿到 label->variant_id 的列表，省掉 groupby
-        overlap_df = overlap_df.rename(columns={"index": "label"})
-        # 把 label 映射到行号
-        overlap_df = overlap_df[overlap_df["label"].isin(label2idx)]
-        if overlap_df.empty:
-            continue
-        # 一次性求和：先按 label 分组，把 variant_id 收集成列表
+        # Sum at once: first group by label and collect variant_id into a list
         label_var_ids = (
             overlap_df
-            .groupby("label")["variant_id"]
+            .groupby("index")["variant_id"]
             .apply(list)
             .reset_index()
         )
-        # 遍历每个 label，一次性切片求和
+        # Traverse each label, sum once for each variant_id list
         for _, row in label_var_ids.iterrows():
-            label = row["label"]
+            label = row["index"]
             row_idx = label2idx[label]
             var_ids = row["variant_id"]
-            # 切片一次求和，避免逐行切片
+            # Sum once for all variant_ids in the list, avoiding row-by-row slicing
             matrix_sum = variant[var_ids, :].X.sum(axis=0)
             if np.isscalar(matrix_sum):
                 matrix_sum = np.asarray(matrix_sum).reshape(1)
-            # 收集非零值
+            # Collect non-zero values
             if matrix_sum.size == 1:
                 val = float(matrix_sum)
                 if val != 0:
@@ -985,7 +979,7 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
                         col_indices.append(col_idx + t_idx)
                         data_vals.append(float(v))
-    # 构建稀疏矩阵，再转 csr
+    # Build sparse matrix, then convert to csr format
     overlap_sparse = sparse.csc_matrix(
         (data_vals, (row_indices, col_indices)),
         shape=(label_all_size, n_trait),

{sciv-0.0.89 → sciv-0.0.91}/src/sciv/tool/_random_walk_.py RENAMED Viewed

@@ -201,7 +201,6 @@ def random_walk(
     if device == 'cpu' or (device == 'auto' and not availability):
         sample_count = seed_cell_weight.shape[1]
-        # 使用 joblib.Parallel 并指定 backend='threading' 保证顺序与输入一致
         results = Parallel(n_jobs=n_jobs)(
             delayed(_random_walk_cpu_)(seed_cell_weight[:, i], weight, gamma, epsilon, p)
             for i in tqdm(range(sample_count))
@@ -690,7 +689,7 @@ class RandomWalk:
                 seed_cell_matrix_en[:, i] = seed_cell_en_value / (1 if seed_cell_en_value.sum() == 0 else seed_cell_en_value.sum())
         # Parallel processing of all traits and real-time display of progress
-        Parallel(n_jobs=self.n_jobs)(
+        Parallel(n_jobs=self.n_jobs, backend='threading')(
             delayed(_process_single_trait)(i) for i in tqdm(self.trait_range, desc="Obtain progress of seed cells with weights")
         )
@@ -955,8 +954,8 @@ class RandomWalk:
             trait_cell_enrichment[:, i][cell_value_credible > self.credible_threshold] = 1
             trait_cell_credible[:, i] = cell_value_credible
-        # Process each trait in parallel
-        Parallel(n_jobs=self.n_jobs)(delayed(_process_trait)(i) for i in tqdm(self.trait_range))
+        # Process each trait in parallel, backend='threading' can effectively prevent the read-only parameter issue caused by copying in loky multi-process mode
+        Parallel(n_jobs=self.n_jobs, backend='threading')(delayed(_process_trait)(i) for i in tqdm(self.trait_range))
         self.trs_adata.layers[_layer_label_] = to_sparse(trait_cell_enrichment)