sciv 0.0.82__py3-none-any.whl → 0.0.84__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sciv/tool/_algorithm_.py CHANGED
@@ -14,7 +14,6 @@ from pandas import DataFrame
14
14
 
15
15
  from ._matrix_ import (
16
16
  matrix_dot_block_storage,
17
- matrix_operation_memory_efficient,
18
17
  vector_multiply_block_storage
19
18
  )
20
19
 
@@ -8,6 +8,7 @@ import torch.nn as nn
8
8
 
9
9
  from torch import Tensor
10
10
  from tqdm import tqdm
11
+ from joblib import Parallel, delayed
11
12
 
12
13
  import numpy as np
13
14
  from anndata import AnnData
@@ -23,14 +24,15 @@ from ..util import (
23
24
  collection,
24
25
  check_adata_get,
25
26
  enrichment_optional,
26
- check_gpu_availability
27
+ check_gpu_availability,
28
+ sparse_data
27
29
  )
28
30
 
29
31
  __name__: str = "tool_random_walk"
30
32
 
31
33
 
32
34
  def _random_walk_cpu_(
33
- seed_cell_vector: collection,
35
+ seed_cell_vector: Union[list, np.ndarray, np.matrix],
34
36
  weight: matrix_data = None,
35
37
  gamma: float = 0.05,
36
38
  epsilon: float = 1e-5,
@@ -46,17 +48,19 @@ def _random_walk_cpu_(
46
48
  :return: The value after random walk.
47
49
  """
48
50
 
49
- w = to_dense(weight)
50
-
51
51
  # Random walk
52
- p0 = seed_cell_vector.copy()[:, np.newaxis]
52
+ p0 = np.asarray(seed_cell_vector, dtype=float).ravel()[:, np.newaxis]
53
53
  pt: matrix_data = p0.copy()
54
54
  k = 0
55
55
  delta = 1
56
56
 
57
57
  # iteration
58
58
  while delta > epsilon:
59
- p1 = (1 - gamma) * np.dot(w, pt) + gamma * p0
59
+
60
+ if hasattr(weight, "dot"):
61
+ p1 = (1 - gamma) * weight.dot(pt) + gamma * p0
62
+ else:
63
+ p1 = (1 - gamma) * np.dot(weight, pt) + gamma * p0
60
64
 
61
65
  # 1 and 2, It would be faster alone
62
66
  if p == 1:
@@ -297,7 +301,6 @@ class RandomWalk:
297
301
 
298
302
  init_status.obs["clusters"] = init_status.obs["clusters"].astype(str)
299
303
 
300
- self.cc_adata = cc_adata
301
304
  self.epsilon = epsilon
302
305
  self.gamma = gamma
303
306
  self.enrichment_gamma = enrichment_gamma
@@ -390,10 +393,12 @@ class RandomWalk:
390
393
  self.random_seed_cell = np.zeros(init_status.shape)
391
394
 
392
395
  # Transition Probability Matrix
393
- self.weight = self._get_weight_(self.cc_adata.X)
396
+ self.weight = self._get_weight_(cc_adata.X)
394
397
 
395
398
  if not is_simple and self.is_ablation:
396
- self.weight_m_knn = self._get_weight_(self.cc_adata.layers["cell_mutual_knn"])
399
+ self.weight_m_knn = self._get_weight_(cc_adata.layers["cell_mutual_knn"])
400
+
401
+ del cc_adata
397
402
 
398
403
  self.cluster_types, self.init_seed_cell_size = self._get_cluster_info_()
399
404
 
@@ -419,6 +424,9 @@ class RandomWalk:
419
424
  self.seed_cell_weight_en_ncw
420
425
  ) = self._get_seed_cell_(init_data=init_status_no_weight, info="ablation")
421
426
 
427
+ del self.cell_affinity
428
+ del init_status
429
+
422
430
  def _random_walk_(
423
431
  self,
424
432
  seed_cell_data: matrix_data,
@@ -461,7 +469,7 @@ class RandomWalk:
461
469
  return self._random_walk_(seed_cell_data, weight, self.gamma)
462
470
 
463
471
  @staticmethod
464
- def _get_weight_(cell_cell_matrix: matrix_data) -> matrix_data:
472
+ def _get_weight_(cell_cell_matrix: matrix_data) -> sparse_data:
465
473
  """
466
474
  Obtain weights in random walk
467
475
  :param cell_cell_matrix: Cell to cell connectivity matrix
@@ -472,7 +480,7 @@ class RandomWalk:
472
480
  data_weight = to_dense(cell_cell_matrix, is_array=True)
473
481
  cell_sum_weight = data_weight.sum(axis=1)[:, np.newaxis]
474
482
  cell_sum_weight[cell_sum_weight == 0] = 1
475
- return data_weight / cell_sum_weight
483
+ return to_sparse(data_weight / cell_sum_weight)
476
484
 
477
485
  def _get_cell_weight_(self, seed_cell_size: int) -> matrix_data:
478
486
  _cell_cell_knn_: matrix_data = self.cell_affinity.copy()
@@ -592,72 +600,117 @@ class RandomWalk:
592
600
  if init_data is None:
593
601
  init_data = self.init_status
594
602
 
595
- # seed cell threshold
596
- seed_cell_count: collection = np.zeros(len(self.trait_list)).astype(int)
597
- seed_cell_threshold: collection = np.zeros(len(self.trait_list))
598
- seed_cell_weight: matrix_data = np.zeros(self.trs_adata.shape)
599
- seed_cell_index: matrix_data = np.zeros(self.trs_adata.shape)
600
- seed_cell_weight_en: matrix_data = np.zeros(self.trs_adata.shape)
603
+ n_traits = len(self.trait_list)
604
+ n_cells = self.cell_size
605
+
606
+ seed_cell_count = np.zeros(n_traits, dtype=int)
607
+ seed_cell_threshold = np.zeros(n_traits)
608
+ seed_cell_weight = np.zeros((n_cells, n_traits))
609
+ seed_cell_index = np.zeros((n_cells, n_traits), dtype=int)
610
+ seed_cell_weight_en = np.zeros((n_cells, n_traits))
601
611
 
602
612
  if not self.is_simple:
603
- seed_cell_matrix: matrix_data = np.zeros(self.trs_adata.shape)
604
- seed_cell_matrix_en: matrix_data = np.zeros(self.trs_adata.shape)
613
+ seed_cell_matrix = np.zeros((n_cells, n_traits))
614
+ seed_cell_matrix_en = np.zeros((n_cells, n_traits))
605
615
  else:
606
- seed_cell_matrix: matrix_data = np.zeros((1, 1))
607
- seed_cell_matrix_en: matrix_data = np.zeros((1, 1))
616
+ seed_cell_matrix = np.zeros((1, 1))
617
+ seed_cell_matrix_en = np.zeros((1, 1))
618
+
619
+ ul.log(__name__).info(f"Calculate {n_traits} traits/diseases for seed cells information.{f' ({info})' if info else ''}")
620
+
621
+ trait_values_all = to_dense(init_data.X, is_array=True)
622
+
623
+ def _process_single_trait(i: int) -> dict:
624
+ trait_value = trait_values_all[:, i]
625
+ trait_value_max = trait_value.max()
626
+ trait_value_min = trait_value.min()
627
+
628
+ if trait_value_min == trait_value_max:
629
+ return dict(
630
+ seed_cell_count=0,
631
+ seed_cell_threshold=0.0,
632
+ seed_cell_index=None,
633
+ seed_cell_weight=None,
634
+ seed_cell_en_index=None,
635
+ seed_cell_en_weight=None,
636
+ seed_cell_matrix=None,
637
+ seed_cell_matrix_en=None
638
+ )
608
639
 
609
- ul.log(__name__).info(f"Calculate {len(self.trait_list)} traits/diseases for seed cells information.{f' ({info})' if info is not None else ''}")
610
- for i in tqdm(self.trait_range):
640
+ # 直接获取降序索引
641
+ trait_value_sort_index = np.argpartition(trait_value, -trait_value.size)[::-1]
642
+
643
+ # 计算 >0 的细胞数
644
+ _gt0_cell_size = (trait_value > 0).sum()
611
645
 
612
- # Obtain all cell score values in a trait
613
- trait_adata: AnnData = init_data[:, i]
614
- trait_value: collection = to_dense(trait_adata.X, is_array=True).flatten()
646
+ _seed_cell_size = self._get_seed_cell_size_(_gt0_cell_size)
615
647
 
616
- # Obtain the maximum initial score
617
- trait_value_max = np.max(trait_value)
618
- trait_value_min = np.min(trait_value)
648
+ # 设置种子细胞索引与权重
649
+ _seed_cell_index = trait_value_sort_index[:_seed_cell_size]
650
+ _seed_cell_weight = np.zeros(n_cells)
651
+ _seed_cell_weight[_seed_cell_index] = self._get_seed_cell_weight_(
652
+ seed_cell_index=_seed_cell_index, value=trait_value
653
+ )
619
654
 
620
- if trait_value_min != trait_value_max:
655
+ # 富集区间索引
656
+ _enrichment_start = _seed_cell_size
657
+ _enrichment_end = min(2 * _seed_cell_size, self.cell_size - 1)
621
658
 
622
- # Obtain a cell count greater than zero
623
- trait_value_sort_index = np.argsort(trait_value).astype(int)
624
- trait_value_sort_index = trait_value_sort_index[::-1]
625
- _gto_cell_index_ = trait_value > 0
626
- _gt0_cell_size_ = trait_value[_gto_cell_index_].size
659
+ if _gt0_cell_size == _seed_cell_size:
660
+ _enrichment_start = max(_seed_cell_size - self._enrichment_seed_cell_min_count_, 0)
661
+ _enrichment_end = _seed_cell_size
627
662
 
628
- _seed_cell_size_ = self._get_seed_cell_size_(_gt0_cell_size_)
663
+ _seed_cell_en_index = trait_value_sort_index[_enrichment_start:_enrichment_end]
664
+ _seed_cell_en_weight = np.zeros(n_cells)
665
+ _tmp_weight = self._get_seed_cell_weight_(
666
+ seed_cell_index=_seed_cell_index if len(_seed_cell_en_index) == len(_seed_cell_index) else _seed_cell_en_index,
667
+ value=trait_value,
668
+ seed_cell_index_enrichment=_seed_cell_en_index
669
+ )
670
+ _seed_cell_en_weight[_seed_cell_en_index] = _tmp_weight
671
+
672
+ # 无权重版本(仅在需要时计算)
673
+ _seed_cell_matrix = None
674
+ _seed_cell_matrix_en = None
675
+
676
+ if not self.is_simple and self.is_ablation:
677
+ seed_cell_value = np.zeros(n_cells)
678
+ seed_cell_value[_seed_cell_index] = 1
679
+ _seed_cell_matrix = seed_cell_value / seed_cell_value.sum() if seed_cell_value.sum() else 0
680
+
681
+ seed_cell_en_value = np.zeros(n_cells)
682
+ seed_cell_en_value[_seed_cell_en_index] = 1
683
+ _seed_cell_matrix_en = seed_cell_en_value / seed_cell_en_value.sum() if seed_cell_en_value.sum() else 0
684
+
685
+ return dict(
686
+ seed_cell_count=_seed_cell_size,
687
+ seed_cell_threshold=trait_value[trait_value_sort_index[_seed_cell_size]],
688
+ seed_cell_index=_seed_cell_index,
689
+ seed_cell_weight=_seed_cell_weight,
690
+ seed_cell_en_index=_seed_cell_en_index,
691
+ seed_cell_en_weight=_seed_cell_en_weight,
692
+ seed_cell_matrix=_seed_cell_matrix,
693
+ seed_cell_matrix_en=_seed_cell_matrix_en
694
+ )
629
695
 
630
- seed_cell_count[i] = _seed_cell_size_
631
- seed_cell_threshold[i] = trait_value[trait_value_sort_index[_seed_cell_size_]]
696
+ # 并行处理所有 trait
697
+ results = Parallel(n_jobs=-1, backend="threading")(delayed(_process_single_trait)(i) for i in self.trait_range)
632
698
 
633
- # Set seed cell weights (reduce noise seed cell weights)
634
- _seed_cell_index_ = trait_value_sort_index[0:_seed_cell_size_]
635
- seed_cell_index[:, i][_seed_cell_index_] = 1
636
- seed_cell_weight[:, i][_seed_cell_index_] = self._get_seed_cell_weight_(seed_cell_index=_seed_cell_index_, value=trait_value)
699
+ # 将并行结果写回对应数组
700
+ for i, res in enumerate(results):
637
701
 
638
- _enrichment_start_index_: int = _seed_cell_size_
639
- _enrichment_end_index_: int = 2 * _seed_cell_size_ if self.cell_size > 2 * _seed_cell_size_ else _seed_cell_size_ - 1
702
+ if res["seed_cell_index"] is None:
703
+ continue
640
704
 
641
- if _gt0_cell_size_ == _seed_cell_size_:
642
- _enrichment_start_index_ = int(_seed_cell_size_ - self._enrichment_seed_cell_min_count_) if _seed_cell_size_ > self._enrichment_seed_cell_min_count_ else (
643
- (_seed_cell_size_ - 1) if _seed_cell_size_ > 2 else 0)
644
- _enrichment_end_index_ = _seed_cell_size_
705
+ seed_cell_count[i] = res["seed_cell_count"]
706
+ seed_cell_threshold[i] = res["seed_cell_threshold"]
707
+ seed_cell_index[res["seed_cell_index"], i] = 1
708
+ seed_cell_weight[:, i] = res["seed_cell_weight"]
709
+ seed_cell_weight_en[res["seed_cell_en_index"], i] = res["seed_cell_en_weight"]
645
710
 
646
- _seed_cell_en_index_ = trait_value_sort_index[_enrichment_start_index_:_enrichment_end_index_]
647
- _seed_cell_en_weight_ = self._get_seed_cell_weight_(
648
- seed_cell_index=_seed_cell_index_ if len(_seed_cell_en_index_) == len(_seed_cell_index_) else _seed_cell_en_index_, value=trait_value,
649
- seed_cell_index_enrichment=_seed_cell_en_index_
650
- )
651
- seed_cell_weight_en[:, i][_seed_cell_en_index_] = _seed_cell_en_weight_
652
-
653
- if not self.is_simple and self.is_ablation:
654
- # Without weight
655
- seed_cell_value = np.zeros(self.cell_size)
656
- seed_cell_value[_seed_cell_index_] = 1
657
- seed_cell_matrix[:, i] = seed_cell_value / (1 if seed_cell_value.sum() == 0 else seed_cell_value.sum())
658
- seed_cell_en_value = np.zeros(self.cell_size)
659
- seed_cell_en_value[_seed_cell_en_index_] = 1
660
- seed_cell_matrix_en[:, i] = seed_cell_en_value / (1 if seed_cell_en_value.sum() == 0 else seed_cell_en_value.sum())
711
+ if not self.is_simple and self.is_ablation:
712
+ seed_cell_matrix[:, i] = res["seed_cell_matrix"]
713
+ seed_cell_matrix_en[:, i] = res["seed_cell_matrix_en"]
661
714
 
662
715
  return seed_cell_count, seed_cell_threshold, seed_cell_matrix, seed_cell_weight, seed_cell_index, seed_cell_matrix_en, seed_cell_weight_en
663
716
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sciv
3
- Version: 0.0.82
3
+ Version: 0.0.84
4
4
  Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
5
5
  Project-URL: github, https://github.com/YuZhengM/sciv
6
6
  Author-email: Zheng-Min Yu <yuzmbio@163.com>
@@ -27,13 +27,13 @@ sciv/preprocessing/_scanpy_.py,sha256=mmkk4cMCzJCziF49RnOuXBiF4frS6aSiwZdUmfDAg4
27
27
  sciv/preprocessing/_scvi_.py,sha256=ZIDkQ_4deYmzSMiAbu5C3j_jMMl7hBTFLCBXHCNj3B4,10332
28
28
  sciv/preprocessing/_snapatac_.py,sha256=Dq8CHF7Psl3CQszaEokQYO56Oe2uzyWOy_cGlaOywfc,27798
29
29
  sciv/tool/__init__.py,sha256=WXzHkWt6RgBC3qqD-98nR5wQmt6oC850ox_VpMrapSU,2468
30
- sciv/tool/_algorithm_.py,sha256=6xLGB1-FRfRiHSCVb_tHvzY_N-RoMZ79p0O2fEio688,48030
30
+ sciv/tool/_algorithm_.py,sha256=okGpH2OrBTO59LkyznT4gRi5S45oAcnO10Kxo5Xzy4I,47991
31
31
  sciv/tool/_matrix_.py,sha256=O1EAhA9wxh06P_eOxEBesK7kO7IExKlhH6uJzGh1HBM,24322
32
- sciv/tool/_random_walk_.py,sha256=98HLa9X2xx3Tj7VKKwQ2oS-CWL7HbOURAXiYKky2OYs,47338
32
+ sciv/tool/_random_walk_.py,sha256=XE7LlTFn5vHVth47cLtIJC6D4vwOsNM5sgrFwx-KL3g,48671
33
33
  sciv/util/__init__.py,sha256=nOxZ8if27X7AUJ6hZwTwxOJwIBJb0obWlHjqCzjg_Gc,1964
34
34
  sciv/util/_constant_.py,sha256=w0wKQd8guLd1ZTW24_5aECrWsIWDiNQmEpLsWlHar1A,3000
35
35
  sciv/util/_core_.py,sha256=ZD2uSnEBHVu0i9TmXWzri_3bXZzYKnIZk818gW3zadE,14751
36
- sciv-0.0.82.dist-info/METADATA,sha256=JTluLyVcmc6vYfTh76ejiHiT0fnqSEVJa9XngVEGj2U,3465
37
- sciv-0.0.82.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
38
- sciv-0.0.82.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
39
- sciv-0.0.82.dist-info/RECORD,,
36
+ sciv-0.0.84.dist-info/METADATA,sha256=XIG0HyGsbObIpMSR8_t0W9IZIAUy-shlHS07ziGAAGI,3465
37
+ sciv-0.0.84.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
38
+ sciv-0.0.84.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
39
+ sciv-0.0.84.dist-info/RECORD,,
File without changes