sciv 0.0.89__tar.gz → 0.0.91__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. {sciv-0.0.89 → sciv-0.0.91}/PKG-INFO +1 -1
  2. {sciv-0.0.89 → sciv-0.0.91}/pyproject.toml +1 -1
  3. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/preprocessing/_scvi_.py +10 -3
  4. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/tool/_algorithm_.py +14 -20
  5. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/tool/_random_walk_.py +3 -4
  6. {sciv-0.0.89 → sciv-0.0.91}/.gitignore +0 -0
  7. {sciv-0.0.89 → sciv-0.0.91}/LICENSE +0 -0
  8. {sciv-0.0.89 → sciv-0.0.91}/MANIFEST.in +0 -0
  9. {sciv-0.0.89 → sciv-0.0.91}/README.en.md +0 -0
  10. {sciv-0.0.89 → sciv-0.0.91}/README.md +0 -0
  11. {sciv-0.0.89 → sciv-0.0.91}/requirements.txt +0 -0
  12. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/__init__.py +0 -0
  13. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/file/__init__.py +0 -0
  14. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/file/_read_.py +0 -0
  15. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/file/_write_.py +0 -0
  16. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/model/__init__.py +0 -0
  17. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/model/_core_.py +0 -0
  18. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/__init__.py +0 -0
  19. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_bar_.py +0 -0
  20. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_barcode_.py +0 -0
  21. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_box_.py +0 -0
  22. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_bubble_.py +0 -0
  23. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_core_.py +0 -0
  24. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_graph_.py +0 -0
  25. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_heat_map_.py +0 -0
  26. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_kde_.py +0 -0
  27. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_line_.py +0 -0
  28. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_pie_.py +0 -0
  29. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_radar_.py +0 -0
  30. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_scatter_.py +0 -0
  31. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_venn_.py +0 -0
  32. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/plot/_violin_.py +0 -0
  33. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/preprocessing/__init__.py +0 -0
  34. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/preprocessing/_anndata_.py +0 -0
  35. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/preprocessing/_gencode_.py +0 -0
  36. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/preprocessing/_gsea_.py +0 -0
  37. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/preprocessing/_scanpy_.py +0 -0
  38. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/preprocessing/_snapatac_.py +0 -0
  39. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/tool/__init__.py +0 -0
  40. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/tool/_matrix_.py +0 -0
  41. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/util/__init__.py +0 -0
  42. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/util/_constant_.py +0 -0
  43. {sciv-0.0.89 → sciv-0.0.91}/src/sciv/util/_core_.py +0 -0
  44. {sciv-0.0.89 → sciv-0.0.91}/tests/scivTest/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sciv
3
- Version: 0.0.89
3
+ Version: 0.0.91
4
4
  Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
5
5
  Project-URL: github, https://github.com/YuZhengM/sciv
6
6
  Author-email: Zheng-Min Yu <yuzmbio@163.com>
@@ -3,7 +3,7 @@ requires = ["hatchling"]
3
3
  build-backend = "hatchling.build"
4
4
  [project]
5
5
  name = "sciv"
6
- version = "0.0.89"
6
+ version = "0.0.91"
7
7
  authors = [
8
8
  { name = "Zheng-Min Yu", email = "yuzmbio@163.com" },
9
9
  ]
@@ -8,6 +8,7 @@ import numpy as np
8
8
  import pandas as pd
9
9
 
10
10
  from anndata import AnnData
11
+ from torch import OutOfMemoryError
11
12
 
12
13
  from .. import util as ul
13
14
  from ..tool import umap, tsne
@@ -126,11 +127,17 @@ def poisson_vi(
126
127
  if model_dir is not None:
127
128
  if os.path.exists(os.path.join(model_dir, "model.pt")):
128
129
  ul.log(__name__).info(f"Due to the existence of file `model.pt`, it is loaded by default.")
130
+
129
131
  try:
130
132
  model = scvi.external.POISSONVI.load(model_dir, adata=adata)
131
- except FileExistsError as fee:
132
- ul.log(__name__).error(f"File `model.pt` failed to load, you can execute `Poisson VI` again by deleting file `model.pt` ({model_dir}/model.pt).\n {fee}")
133
- raise ValueError(f"File `model.pt` failed to load, you can execute `Poisson VI` again by deleting file `model.pt` ({model_dir}/model.pt).")
133
+ except OutOfMemoryError as ome:
134
+ ul.log(__name__).warning(f"GPU failed to run, try to switch to CPU running.\n {ome}")
135
+
136
+ try:
137
+ model = scvi.external.POISSONVI.load(model_dir, adata=adata, accelerator="cpu", devices="cpu")
138
+ except Exception as e:
139
+ ul.log(__name__).error(f"File `model.pt` failed to load, you can execute `Poisson VI` again by deleting file `model.pt` ({model_dir}/model.pt).\n {e}")
140
+ raise ValueError(f"File `model.pt` failed to load, you can execute `Poisson VI` again by deleting file `model.pt` ({model_dir}/model.pt).")
134
141
  else:
135
142
  ul.file_method(__name__).makedirs(model_dir)
136
143
  model = __train__()
@@ -814,7 +814,9 @@ def _overlap_(regions_sort: DataFrame, variants: DataFrame) -> DataFrame:
814
814
 
815
815
  variants_overlap_info_list: list = []
816
816
 
817
- for index, chr_a, start, end in zip(regions_sort["index"], regions_sort["chr"], regions_sort["start"],
817
+ for index, chr_a, start, end in zip(regions_sort["index"],
818
+ regions_sort["chr"],
819
+ regions_sort["start"],
818
820
  regions_sort["end"]):
819
821
 
820
822
  # judge chr
@@ -906,15 +908,15 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
906
908
  # Peak number
907
909
  label_all_size: int = len(label_all)
908
910
 
909
- # 预先把 peaks index 做成 dict,O(1) 查找
911
+ # Pre-build a dict of peak indices for O(1) lookup
910
912
  label2idx = {lb: i for i, lb in enumerate(label_all)}
911
913
 
912
914
  trait_names = trait_info["id"].tolist()
913
915
  n_trait = len(trait_names)
914
- # 提前分配稀疏矩阵,按列填充,最后一次性转成 csc 再转 csr,省内存且快
916
+ # Pre-allocate sparse matrix, fill column by column, then convert to csc and then csr for efficiency
915
917
  row_indices, col_indices, data_vals = [], [], []
916
918
 
917
- # 检查列存在性一次完成
919
+ # Check column existence once
918
920
  required = {"chr", "start", "end"}
919
921
 
920
922
  if not required.issubset(regions.var.columns):
@@ -936,7 +938,7 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
936
938
 
937
939
  ul.log(__name__).info("Obtain peak-trait/disease matrix. (overlap variant information)")
938
940
 
939
- # 外层循环按 trait 并行可再加速,这里先保持单循环
941
+ # The outer loop can be further accelerated by parallelizing over traits; here we keep it single-threaded for now.
940
942
  for col_idx, trait_name in enumerate(tqdm(trait_names)):
941
943
  variant: AnnData = variants[trait_name]
942
944
  overlap_df: DataFrame = _overlap_(regions_df, variant.obs)
@@ -944,34 +946,26 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
944
946
  if overlap_df.empty:
945
947
  continue
946
948
 
947
- # 直接拿到 label->variant_id 的列表,省掉 groupby
948
- overlap_df = overlap_df.rename(columns={"index": "label"})
949
- # 把 label 映射到行号
950
- overlap_df = overlap_df[overlap_df["label"].isin(label2idx)]
951
-
952
- if overlap_df.empty:
953
- continue
954
-
955
- # 一次性求和:先按 label 分组,把 variant_id 收集成列表
949
+ # Sum at once: first group by label and collect variant_id into a list
956
950
  label_var_ids = (
957
951
  overlap_df
958
- .groupby("label")["variant_id"]
952
+ .groupby("index")["variant_id"]
959
953
  .apply(list)
960
954
  .reset_index()
961
955
  )
962
956
 
963
- # 遍历每个 label,一次性切片求和
957
+ # Traverse each label, sum once for each variant_id list
964
958
  for _, row in label_var_ids.iterrows():
965
- label = row["label"]
959
+ label = row["index"]
966
960
  row_idx = label2idx[label]
967
961
  var_ids = row["variant_id"]
968
- # 切片一次求和,避免逐行切片
962
+ # Sum once for all variant_ids in the list, avoiding row-by-row slicing
969
963
  matrix_sum = variant[var_ids, :].X.sum(axis=0)
970
964
 
971
965
  if np.isscalar(matrix_sum):
972
966
  matrix_sum = np.asarray(matrix_sum).reshape(1)
973
967
 
974
- # 收集非零值
968
+ # Collect non-zero values
975
969
  if matrix_sum.size == 1:
976
970
  val = float(matrix_sum)
977
971
  if val != 0:
@@ -985,7 +979,7 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
985
979
  col_indices.append(col_idx + t_idx)
986
980
  data_vals.append(float(v))
987
981
 
988
- # 构建稀疏矩阵,再转 csr
982
+ # Build sparse matrix, then convert to csr format
989
983
  overlap_sparse = sparse.csc_matrix(
990
984
  (data_vals, (row_indices, col_indices)),
991
985
  shape=(label_all_size, n_trait),
@@ -201,7 +201,6 @@ def random_walk(
201
201
  if device == 'cpu' or (device == 'auto' and not availability):
202
202
  sample_count = seed_cell_weight.shape[1]
203
203
 
204
- # 使用 joblib.Parallel 并指定 backend='threading' 保证顺序与输入一致
205
204
  results = Parallel(n_jobs=n_jobs)(
206
205
  delayed(_random_walk_cpu_)(seed_cell_weight[:, i], weight, gamma, epsilon, p)
207
206
  for i in tqdm(range(sample_count))
@@ -690,7 +689,7 @@ class RandomWalk:
690
689
  seed_cell_matrix_en[:, i] = seed_cell_en_value / (1 if seed_cell_en_value.sum() == 0 else seed_cell_en_value.sum())
691
690
 
692
691
  # Parallel processing of all traits and real-time display of progress
693
- Parallel(n_jobs=self.n_jobs)(
692
+ Parallel(n_jobs=self.n_jobs, backend='threading')(
694
693
  delayed(_process_single_trait)(i) for i in tqdm(self.trait_range, desc="Obtain progress of seed cells with weights")
695
694
  )
696
695
 
@@ -955,8 +954,8 @@ class RandomWalk:
955
954
  trait_cell_enrichment[:, i][cell_value_credible > self.credible_threshold] = 1
956
955
  trait_cell_credible[:, i] = cell_value_credible
957
956
 
958
- # Process each trait in parallel
959
- Parallel(n_jobs=self.n_jobs)(delayed(_process_trait)(i) for i in tqdm(self.trait_range))
957
+ # Process each trait in parallel, backend='threading' can effectively prevent the read-only parameter issue caused by copying in loky multi-process mode
958
+ Parallel(n_jobs=self.n_jobs, backend='threading')(delayed(_process_trait)(i) for i in tqdm(self.trait_range))
960
959
 
961
960
  self.trs_adata.layers[_layer_label_] = to_sparse(trait_cell_enrichment)
962
961
 
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes