sciv 0.0.96__py3-none-any.whl → 0.0.98__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sciv/model/_core_.py CHANGED
@@ -15,7 +15,7 @@ from ..tool import RandomWalk, overlap_sum, obtain_cell_cell_network, calculate_
15
15
 
16
16
  from ..file import save_h5ad, save_pkl, read_h5ad, read_pkl
17
17
  from ..preprocessing import filter_data, poisson_vi
18
- from ..util import path, enrichment_optional, to_dense, collection, to_sparse, difference_peak_optional
18
+ from ..util import path, enrichment_optional, to_dense, collection, to_sparse, difference_peak_optional, project_name
19
19
 
20
20
  __name__: str = "model_core"
21
21
 
@@ -81,7 +81,7 @@ def core(
81
81
  weight: float = 0.1,
82
82
  kernel: Literal["laplacian", "gaussian"] = "gaussian",
83
83
  local_k: int = 10,
84
- kernel_gamma: Optional[float, collection] = None,
84
+ kernel_gamma: Optional[Union[float, collection]] = None,
85
85
  epsilon: float = 1e-05,
86
86
  gamma: float = 0.05,
87
87
  enrichment_gamma: float = 0.05,
@@ -453,7 +453,7 @@ def core(
453
453
  model_dir=model_dir
454
454
  )
455
455
 
456
- step1_time = adata.uns["elapsed_time"] + da_peaks.uns["elapsed_time"]
456
+ poisson_vi_time = adata.uns["elapsed_time"] + da_peaks.uns["elapsed_time"]
457
457
 
458
458
  if save_path is not None:
459
459
 
@@ -481,11 +481,11 @@ def core(
481
481
  )
482
482
 
483
483
  else:
484
- overlap_adata: AnnData = overlap_sum(adata, variants, trait_info)
484
+ overlap_adata: AnnData = overlap_sum(adata, variants, trait_info, n_jobs=n_jobs)
485
485
 
486
486
  del variants, trait_info
487
487
 
488
- step2_time = overlap_adata.uns["elapsed_time"]
488
+ overlap_time = overlap_adata.uns["elapsed_time"]
489
489
 
490
490
  if save_path is not None and not overlap_is_read:
491
491
  save_h5ad(overlap_adata, file=atac_overlap_save_file)
@@ -518,7 +518,7 @@ def core(
518
518
 
519
519
  del da_peaks, overlap_adata
520
520
 
521
- step3_time = init_score.uns["elapsed_time"]
521
+ init_score_time = init_score.uns["elapsed_time"]
522
522
 
523
523
  if save_path is not None and not init_score_is_read:
524
524
  save_h5ad(init_score, file=init_score_save_file)
@@ -546,13 +546,13 @@ def core(
546
546
 
547
547
  del adata
548
548
 
549
- step4_time = cc_data.uns["elapsed_time"]
549
+ smknn_time = cc_data.uns["elapsed_time"]
550
550
 
551
551
  if save_path is not None and not cc_data_is_read:
552
552
  save_h5ad(cc_data, file=cc_data_save_file)
553
553
 
554
554
  """
555
- 5. Random walk
555
+ 5. Random walk with weighted seed cells
556
556
  """
557
557
 
558
558
  random_walk_is_read: bool = is_file_exist_loading and os.path.exists(random_walk_save_file) and is_save_random_walk_model
@@ -584,17 +584,29 @@ def core(
584
584
  del random_walk_is_read, init_score, cc_data
585
585
 
586
586
  trs = _run_random_walk_(random_walk, is_ablation, is_simple)
587
+ trs.uns["params"] = params
588
+
589
+ del params
587
590
 
588
- step5_time = random_walk.elapsed_time
591
+ random_walk_time = random_walk.elapsed_time
589
592
 
590
593
  # end time
591
594
  elapsed_time = time.time() - start_time
592
- step_time = step1_time + step2_time + step3_time + step4_time + step5_time
595
+ step_time = poisson_vi_time + overlap_time + init_score_time + smknn_time + random_walk_time
593
596
 
594
- params.update({"elapsed_time": elapsed_time if elapsed_time > step_time else step_time})
595
- trs.uns["params"] = params
597
+ if elapsed_time < step_time:
598
+ elapsed_time = step_time
596
599
 
597
- del params
600
+ ul.log(__name__).info(f"Algorithm {project_name} consumes a total of {elapsed_time} seconds.")
601
+
602
+ trs.uns["elapsed_time"] = {
603
+ "PoissonVI": poisson_vi_time,
604
+ "Overlap": overlap_time,
605
+ "initial TRS": init_score_time,
606
+ "SM-kNN": smknn_time,
607
+ "Random walk": random_walk_time,
608
+ "Total time": elapsed_time
609
+ }
598
610
 
599
611
  if save_path is not None:
600
612
  save_h5ad(trs, file=trs_save_file)
sciv/tool/_algorithm_.py CHANGED
@@ -7,6 +7,8 @@ from typing import Union, Tuple, Literal, Optional
7
7
  from scipy import sparse
8
8
  from scipy.stats import norm
9
9
  from tqdm import tqdm
10
+ from joblib import Parallel, delayed
11
+ import multiprocessing
10
12
 
11
13
  import numpy as np
12
14
  from anndata import AnnData
@@ -516,7 +518,7 @@ def semi_mutual_knn_weight(
516
518
  adj_weight = (1 - weight) * adj_and.astype(np.float32) + weight * adj_or.astype(np.float32)
517
519
 
518
520
  # Ensure full connectivity if required
519
- if is_mknn_fully_connected and (or_neighbors == 0 or weight == 0):
521
+ if is_mknn_fully_connected:
520
522
  adj_1nn = _knn(new_data, 1)
521
523
 
522
524
  if sparse.issparse(adj_and):
@@ -824,16 +826,17 @@ def _overlap_(regions_sort: DataFrame, variants: DataFrame) -> DataFrame:
824
826
  if chr_a in chr_keys:
825
827
  # get chr variant
826
828
  variants_chr_type_position_list = variants_position_list[chr_a]
829
+
827
830
  # judge start and end position
828
831
  if start <= variants_chr_type_position_list[-1] and end >= variants_chr_type_position_list[0]:
829
832
  # get index
830
- start_index = get_index(start, variants_chr_type_position_list)
831
- end_index = get_index(end, variants_chr_type_position_list)
833
+ start_index = get_index(start, variants_chr_type_position_list, False)
834
+ end_index = get_index(end, variants_chr_type_position_list, False)
832
835
 
833
836
  # Determine whether it is equal, Equality means there is no overlap
834
837
  if start_index != end_index:
835
- start_index = start_index if isinstance(start_index, number) else start_index[1]
836
- end_index = end_index + 1 if isinstance(end_index, number) else end_index[1]
838
+ start_index = start_index if isinstance(start_index, int) else start_index[1]
839
+ end_index = end_index + 1 if isinstance(end_index, int) else end_index[1]
837
840
 
838
841
  if start_index > end_index:
839
842
  ul.log(__name__).error("The end index in the region is greater than the start index.")
@@ -894,13 +897,14 @@ def overlap(regions: DataFrame, variants: DataFrame) -> DataFrame:
894
897
  return _overlap_(regions_sort, variants)
895
898
 
896
899
 
897
- def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnData:
900
+ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame, n_jobs: int = -1) -> AnnData:
898
901
  """
899
902
  Overlap regional data and mutation data and sum the PP values of all mutations in a region as the values for that
900
903
  region.
901
904
  :param regions: peaks data
902
905
  :param variants: variants data
903
906
  :param trait_info: traits information
907
+ :param n_jobs: The maximum number of concurrently running jobs
904
908
  :return: overlap data
905
909
  """
906
910
 
@@ -916,8 +920,6 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
916
920
 
917
921
  trait_names = trait_info["id"].tolist()
918
922
  n_trait = len(trait_names)
919
- # Pre-allocate sparse matrix, fill column by column, then convert to csc and then csr for efficiency
920
- row_indices, col_indices, data_vals = [], [], []
921
923
 
922
924
  # Check column existence once
923
925
  required = {"chr", "start", "end"}
@@ -941,13 +943,18 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
941
943
 
942
944
  ul.log(__name__).info("Obtain peak-trait/disease matrix. (overlap variant information)")
943
945
 
944
- # The outer loop can be further accelerated by parallelizing over traits; here we keep it single-threaded for now.
945
- for col_idx, trait_name in enumerate(tqdm(trait_names)):
946
+ # Function to process a single trait
947
+ def _process_trait_(trait_name, col_idx):
948
+
949
+ local_data_vals = []
950
+ local_row_indices = []
951
+ local_col_indices = []
952
+
946
953
  variant: AnnData = variants[trait_name]
947
954
  overlap_df: DataFrame = _overlap_(regions_df, variant.obs)
948
955
 
949
956
  if overlap_df.empty:
950
- continue
957
+ return local_data_vals, local_row_indices, local_col_indices
951
958
 
952
959
  # Sum at once: first group by label and collect variant_id into a list
953
960
  label_var_ids = (
@@ -972,15 +979,37 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
972
979
  if matrix_sum.size == 1:
973
980
  val = float(matrix_sum)
974
981
  if val != 0:
975
- row_indices.append(row_idx)
976
- col_indices.append(col_idx)
977
- data_vals.append(val)
982
+ local_row_indices.append(row_idx)
983
+ local_col_indices.append(col_idx)
984
+ local_data_vals.append(val)
978
985
  else:
979
986
  for t_idx, v in enumerate(matrix_sum):
980
987
  if v != 0:
981
- row_indices.append(row_idx)
982
- col_indices.append(col_idx + t_idx)
983
- data_vals.append(float(v))
988
+ local_row_indices.append(row_idx)
989
+ local_col_indices.append(col_idx + t_idx)
990
+ local_data_vals.append(float(v))
991
+
992
+ return local_data_vals, local_row_indices, local_col_indices
993
+
994
+ # Use Parallel to process traits in parallel
995
+ results = Parallel(n_jobs=n_jobs)(
996
+ delayed(_process_trait_)(trait_name, col_idx) for col_idx, trait_name in tqdm(enumerate(trait_names))
997
+ )
998
+
999
+ # Preallocate length to avoid list dynamic expansion
1000
+ total = sum(len(ld) for ld, _, _ in results)
1001
+ row_indices = np.empty(total, dtype=np.int32)
1002
+ col_indices = np.empty(total, dtype=np.int32)
1003
+ data_vals = np.empty(total, dtype=np.float32)
1004
+
1005
+ ptr = 0
1006
+
1007
+ for local_data, local_rows, local_cols in results:
1008
+ n = len(local_data)
1009
+ row_indices[ptr:ptr+n] = local_rows
1010
+ col_indices[ptr:ptr+n] = local_cols
1011
+ data_vals[ptr:ptr+n] = local_data
1012
+ ptr += n
984
1013
 
985
1014
  # Build sparse matrix, then convert to csr format
986
1015
  overlap_sparse = sparse.csc_matrix(
@@ -1205,10 +1234,18 @@ def calculate_init_score_weight(
1205
1234
 
1206
1235
  ul.log(__name__).info("Calculate initial trait relevance scores")
1207
1236
  _init_trs_weight_ = np.multiply(_init_trs_ncw_, _cell_type_weight_)
1237
+
1238
+ if hasattr(_init_trs_weight_, "A"):
1239
+ _init_trs_weight_ = _init_trs_weight_.A
1240
+
1208
1241
  init_trs_adata = AnnData(_init_trs_weight_, obs=cell_anno, var=trait_anno)
1209
1242
  del _init_trs_weight_
1210
1243
 
1211
1244
  if not is_simple:
1245
+
1246
+ if hasattr(_init_trs_ncw_, "A"):
1247
+ _init_trs_ncw_ = _init_trs_ncw_.A
1248
+
1212
1249
  init_trs_adata.layers["init_trs_ncw"] = _init_trs_ncw_
1213
1250
  init_trs_adata.layers["cell_type_weight"] = to_sparse(_cell_type_weight_)
1214
1251
  init_trs_adata.uns["cluster_weight_factor"] = da_peaks_adata.obsm["cluster_weight"]
@@ -1253,7 +1290,7 @@ def obtain_cell_cell_network(
1253
1290
  weight: float = 0.1,
1254
1291
  kernel: Literal["laplacian", "gaussian"] = "gaussian",
1255
1292
  local_k: int = 10,
1256
- gamma: Optional[float, collection] = None,
1293
+ gamma: Optional[Union[float, collection]] = None,
1257
1294
  is_simple: bool = True
1258
1295
  ) -> AnnData:
1259
1296
  """
sciv/util/_core_.py CHANGED
@@ -90,17 +90,20 @@ def sum_min_max(data: matrix_data, axis: int = 1) -> Tuple[number, number]:
90
90
  return min(rows_sum), max(rows_sum)
91
91
 
92
92
 
93
- def get_index(position: number, positions_list: list) -> Union[number, Tuple[number, number]]:
93
+ def get_index(position: number, positions_list: list, is_sort: bool = True) -> Union[int, Tuple[int, int]]:
94
94
  """
95
95
  Search for position information. Similar to half search.
96
96
  If the position exists in the list, return the index.
97
97
  If it does not exist, return the index located between the two indexes
98
98
  :param position: position
99
99
  :param positions_list: position list
100
+ :param is_sort: True
100
101
  :return: position index
101
102
  """
102
- # sort
103
- positions_list.sort()
103
+
104
+ if is_sort:
105
+ positions_list.sort()
106
+
104
107
  # search
105
108
  position_size: int = len(positions_list)
106
109
  left, right = 0, position_size - 1
@@ -441,7 +444,6 @@ def plot_end(
441
444
  close: bool = False,
442
445
  dpi: float = 300
443
446
  ):
444
-
445
447
  if title is not None:
446
448
  plt.title(title)
447
449
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sciv
3
- Version: 0.0.96
3
+ Version: 0.0.98
4
4
  Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
5
5
  Project-URL: github, https://github.com/YuZhengM/sciv
6
6
  Author-email: Zheng-Min Yu <yuzmbio@163.com>
@@ -3,7 +3,7 @@ sciv/file/__init__.py,sha256=8cYLG0S0nilblmyX46CWFrbLr-rmLbO1EEO477pZ-gk,520
3
3
  sciv/file/_read_.py,sha256=UZJpN3_5hBiTjzEYO6YXORcE_dqA8HmLpV80nqTLNSo,30554
4
4
  sciv/file/_write_.py,sha256=W3M9CmPi7BuKAffz1fdi-vA5DzAFZ7wmcggp33N9Xtg,7848
5
5
  sciv/model/__init__.py,sha256=k8SO9FpJaGn2ANqJyaz3HXMas7jH9toPVtpw703kOqg,149
6
- sciv/model/_core_.py,sha256=3GjKG5w-cTBF53LpSrFrMFnqwtgMI2_BkSlLGFMMMj8,33255
6
+ sciv/model/_core_.py,sha256=HCXPFOxfed5TUJMI-pZV9cx9_2R8z4dNPb2f7Hp3mVc,33693
7
7
  sciv/plot/__init__.py,sha256=2tRNT6TZNz9r38lnna712RGsH7OJ2QkGa37XKgzejHQ,1865
8
8
  sciv/plot/_bar_.py,sha256=xWpFbJTHgQMLuUSXa4uE69RGHXSCGinapxL-1imdDcU,14355
9
9
  sciv/plot/_barcode_.py,sha256=RDOedQ8ZtXWFyJ2c772RDfqO4TMIpHMvcMZMAVqky90,5073
@@ -27,13 +27,13 @@ sciv/preprocessing/_scanpy_.py,sha256=tB8BD2wpLAU8_YxdqrgNtcjpNXNRo-JCdm2lxaKDBL
27
27
  sciv/preprocessing/_scvi_.py,sha256=7QxwPA2kR_g15X28aEak7AFA4kyQ-UbtpiLH-rc5Ksg,10780
28
28
  sciv/preprocessing/_snapatac_.py,sha256=Dq8CHF7Psl3CQszaEokQYO56Oe2uzyWOy_cGlaOywfc,27798
29
29
  sciv/tool/__init__.py,sha256=WXzHkWt6RgBC3qqD-98nR5wQmt6oC850ox_VpMrapSU,2468
30
- sciv/tool/_algorithm_.py,sha256=uzA__knaIgIMcnFK-JwKT4sIbCETh94eYH-2WB_BpXc,52728
30
+ sciv/tool/_algorithm_.py,sha256=mYKfSuYGelLd2secwyqGPxBQYd3x2yDKw1z7HK8mqYE,53773
31
31
  sciv/tool/_matrix_.py,sha256=SnC3sXic_ufuEXStcD_HncvYH6apBdNK6nhG6jFLmjA,24324
32
32
  sciv/tool/_random_walk_.py,sha256=JOB97XLxlZYHvlIST1wlXgA0mw6fybkWnJGq6X_kbsk,48871
33
33
  sciv/util/__init__.py,sha256=nOxZ8if27X7AUJ6hZwTwxOJwIBJb0obWlHjqCzjg_Gc,1964
34
34
  sciv/util/_constant_.py,sha256=w0wKQd8guLd1ZTW24_5aECrWsIWDiNQmEpLsWlHar1A,3000
35
- sciv/util/_core_.py,sha256=hF33ybPcoVlapZsm-2Etem-p_rQUqXlsdaQgZv5jD7w,14867
36
- sciv-0.0.96.dist-info/METADATA,sha256=xLbV5NRQL7Q3aA25Peb8Idk1PjZ7rM6yXlJsH7VM_OI,3465
37
- sciv-0.0.96.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
38
- sciv-0.0.96.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
39
- sciv-0.0.96.dist-info/RECORD,,
35
+ sciv/util/_core_.py,sha256=TUWfBNRJzWuoQ9ffew_DjnlkNydG-Rmujl_RH4Ln9io,14917
36
+ sciv-0.0.98.dist-info/METADATA,sha256=yZiAx2el-OqapNmZNjUpjAmTAmQbIcBIZa7QFXjYPYc,3465
37
+ sciv-0.0.98.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
38
+ sciv-0.0.98.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
39
+ sciv-0.0.98.dist-info/RECORD,,
File without changes