sciv 0.0.82__py3-none-any.whl → 0.0.84__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sciv/tool/_algorithm_.py +0 -1
- sciv/tool/_random_walk_.py +116 -63
- {sciv-0.0.82.dist-info → sciv-0.0.84.dist-info}/METADATA +1 -1
- {sciv-0.0.82.dist-info → sciv-0.0.84.dist-info}/RECORD +6 -6
- {sciv-0.0.82.dist-info → sciv-0.0.84.dist-info}/WHEEL +0 -0
- {sciv-0.0.82.dist-info → sciv-0.0.84.dist-info}/licenses/LICENSE +0 -0
sciv/tool/_algorithm_.py
CHANGED
sciv/tool/_random_walk_.py
CHANGED
|
@@ -8,6 +8,7 @@ import torch.nn as nn
|
|
|
8
8
|
|
|
9
9
|
from torch import Tensor
|
|
10
10
|
from tqdm import tqdm
|
|
11
|
+
from joblib import Parallel, delayed
|
|
11
12
|
|
|
12
13
|
import numpy as np
|
|
13
14
|
from anndata import AnnData
|
|
@@ -23,14 +24,15 @@ from ..util import (
|
|
|
23
24
|
collection,
|
|
24
25
|
check_adata_get,
|
|
25
26
|
enrichment_optional,
|
|
26
|
-
check_gpu_availability
|
|
27
|
+
check_gpu_availability,
|
|
28
|
+
sparse_data
|
|
27
29
|
)
|
|
28
30
|
|
|
29
31
|
__name__: str = "tool_random_walk"
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
def _random_walk_cpu_(
|
|
33
|
-
seed_cell_vector:
|
|
35
|
+
seed_cell_vector: Union[list, np.ndarray, np.matrix],
|
|
34
36
|
weight: matrix_data = None,
|
|
35
37
|
gamma: float = 0.05,
|
|
36
38
|
epsilon: float = 1e-5,
|
|
@@ -46,17 +48,19 @@ def _random_walk_cpu_(
|
|
|
46
48
|
:return: The value after random walk.
|
|
47
49
|
"""
|
|
48
50
|
|
|
49
|
-
w = to_dense(weight)
|
|
50
|
-
|
|
51
51
|
# Random walk
|
|
52
|
-
p0 = seed_cell_vector.
|
|
52
|
+
p0 = np.asarray(seed_cell_vector, dtype=float).ravel()[:, np.newaxis]
|
|
53
53
|
pt: matrix_data = p0.copy()
|
|
54
54
|
k = 0
|
|
55
55
|
delta = 1
|
|
56
56
|
|
|
57
57
|
# iteration
|
|
58
58
|
while delta > epsilon:
|
|
59
|
-
|
|
59
|
+
|
|
60
|
+
if hasattr(weight, "dot"):
|
|
61
|
+
p1 = (1 - gamma) * weight.dot(pt) + gamma * p0
|
|
62
|
+
else:
|
|
63
|
+
p1 = (1 - gamma) * np.dot(weight, pt) + gamma * p0
|
|
60
64
|
|
|
61
65
|
# 1 and 2, It would be faster alone
|
|
62
66
|
if p == 1:
|
|
@@ -297,7 +301,6 @@ class RandomWalk:
|
|
|
297
301
|
|
|
298
302
|
init_status.obs["clusters"] = init_status.obs["clusters"].astype(str)
|
|
299
303
|
|
|
300
|
-
self.cc_adata = cc_adata
|
|
301
304
|
self.epsilon = epsilon
|
|
302
305
|
self.gamma = gamma
|
|
303
306
|
self.enrichment_gamma = enrichment_gamma
|
|
@@ -390,10 +393,12 @@ class RandomWalk:
|
|
|
390
393
|
self.random_seed_cell = np.zeros(init_status.shape)
|
|
391
394
|
|
|
392
395
|
# Transition Probability Matrix
|
|
393
|
-
self.weight = self._get_weight_(
|
|
396
|
+
self.weight = self._get_weight_(cc_adata.X)
|
|
394
397
|
|
|
395
398
|
if not is_simple and self.is_ablation:
|
|
396
|
-
self.weight_m_knn = self._get_weight_(
|
|
399
|
+
self.weight_m_knn = self._get_weight_(cc_adata.layers["cell_mutual_knn"])
|
|
400
|
+
|
|
401
|
+
del cc_adata
|
|
397
402
|
|
|
398
403
|
self.cluster_types, self.init_seed_cell_size = self._get_cluster_info_()
|
|
399
404
|
|
|
@@ -419,6 +424,9 @@ class RandomWalk:
|
|
|
419
424
|
self.seed_cell_weight_en_ncw
|
|
420
425
|
) = self._get_seed_cell_(init_data=init_status_no_weight, info="ablation")
|
|
421
426
|
|
|
427
|
+
del self.cell_affinity
|
|
428
|
+
del init_status
|
|
429
|
+
|
|
422
430
|
def _random_walk_(
|
|
423
431
|
self,
|
|
424
432
|
seed_cell_data: matrix_data,
|
|
@@ -461,7 +469,7 @@ class RandomWalk:
|
|
|
461
469
|
return self._random_walk_(seed_cell_data, weight, self.gamma)
|
|
462
470
|
|
|
463
471
|
@staticmethod
|
|
464
|
-
def _get_weight_(cell_cell_matrix: matrix_data) ->
|
|
472
|
+
def _get_weight_(cell_cell_matrix: matrix_data) -> sparse_data:
|
|
465
473
|
"""
|
|
466
474
|
Obtain weights in random walk
|
|
467
475
|
:param cell_cell_matrix: Cell to cell connectivity matrix
|
|
@@ -472,7 +480,7 @@ class RandomWalk:
|
|
|
472
480
|
data_weight = to_dense(cell_cell_matrix, is_array=True)
|
|
473
481
|
cell_sum_weight = data_weight.sum(axis=1)[:, np.newaxis]
|
|
474
482
|
cell_sum_weight[cell_sum_weight == 0] = 1
|
|
475
|
-
return data_weight / cell_sum_weight
|
|
483
|
+
return to_sparse(data_weight / cell_sum_weight)
|
|
476
484
|
|
|
477
485
|
def _get_cell_weight_(self, seed_cell_size: int) -> matrix_data:
|
|
478
486
|
_cell_cell_knn_: matrix_data = self.cell_affinity.copy()
|
|
@@ -592,72 +600,117 @@ class RandomWalk:
|
|
|
592
600
|
if init_data is None:
|
|
593
601
|
init_data = self.init_status
|
|
594
602
|
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
603
|
+
n_traits = len(self.trait_list)
|
|
604
|
+
n_cells = self.cell_size
|
|
605
|
+
|
|
606
|
+
seed_cell_count = np.zeros(n_traits, dtype=int)
|
|
607
|
+
seed_cell_threshold = np.zeros(n_traits)
|
|
608
|
+
seed_cell_weight = np.zeros((n_cells, n_traits))
|
|
609
|
+
seed_cell_index = np.zeros((n_cells, n_traits), dtype=int)
|
|
610
|
+
seed_cell_weight_en = np.zeros((n_cells, n_traits))
|
|
601
611
|
|
|
602
612
|
if not self.is_simple:
|
|
603
|
-
seed_cell_matrix
|
|
604
|
-
seed_cell_matrix_en
|
|
613
|
+
seed_cell_matrix = np.zeros((n_cells, n_traits))
|
|
614
|
+
seed_cell_matrix_en = np.zeros((n_cells, n_traits))
|
|
605
615
|
else:
|
|
606
|
-
seed_cell_matrix
|
|
607
|
-
seed_cell_matrix_en
|
|
616
|
+
seed_cell_matrix = np.zeros((1, 1))
|
|
617
|
+
seed_cell_matrix_en = np.zeros((1, 1))
|
|
618
|
+
|
|
619
|
+
ul.log(__name__).info(f"Calculate {n_traits} traits/diseases for seed cells information.{f' ({info})' if info else ''}")
|
|
620
|
+
|
|
621
|
+
trait_values_all = to_dense(init_data.X, is_array=True)
|
|
622
|
+
|
|
623
|
+
def _process_single_trait(i: int) -> dict:
|
|
624
|
+
trait_value = trait_values_all[:, i]
|
|
625
|
+
trait_value_max = trait_value.max()
|
|
626
|
+
trait_value_min = trait_value.min()
|
|
627
|
+
|
|
628
|
+
if trait_value_min == trait_value_max:
|
|
629
|
+
return dict(
|
|
630
|
+
seed_cell_count=0,
|
|
631
|
+
seed_cell_threshold=0.0,
|
|
632
|
+
seed_cell_index=None,
|
|
633
|
+
seed_cell_weight=None,
|
|
634
|
+
seed_cell_en_index=None,
|
|
635
|
+
seed_cell_en_weight=None,
|
|
636
|
+
seed_cell_matrix=None,
|
|
637
|
+
seed_cell_matrix_en=None
|
|
638
|
+
)
|
|
608
639
|
|
|
609
|
-
|
|
610
|
-
|
|
640
|
+
# 直接获取降序索引
|
|
641
|
+
trait_value_sort_index = np.argpartition(trait_value, -trait_value.size)[::-1]
|
|
642
|
+
|
|
643
|
+
# 计算 >0 的细胞数
|
|
644
|
+
_gt0_cell_size = (trait_value > 0).sum()
|
|
611
645
|
|
|
612
|
-
|
|
613
|
-
trait_adata: AnnData = init_data[:, i]
|
|
614
|
-
trait_value: collection = to_dense(trait_adata.X, is_array=True).flatten()
|
|
646
|
+
_seed_cell_size = self._get_seed_cell_size_(_gt0_cell_size)
|
|
615
647
|
|
|
616
|
-
#
|
|
617
|
-
|
|
618
|
-
|
|
648
|
+
# 设置种子细胞索引与权重
|
|
649
|
+
_seed_cell_index = trait_value_sort_index[:_seed_cell_size]
|
|
650
|
+
_seed_cell_weight = np.zeros(n_cells)
|
|
651
|
+
_seed_cell_weight[_seed_cell_index] = self._get_seed_cell_weight_(
|
|
652
|
+
seed_cell_index=_seed_cell_index, value=trait_value
|
|
653
|
+
)
|
|
619
654
|
|
|
620
|
-
|
|
655
|
+
# 富集区间索引
|
|
656
|
+
_enrichment_start = _seed_cell_size
|
|
657
|
+
_enrichment_end = min(2 * _seed_cell_size, self.cell_size - 1)
|
|
621
658
|
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
_gto_cell_index_ = trait_value > 0
|
|
626
|
-
_gt0_cell_size_ = trait_value[_gto_cell_index_].size
|
|
659
|
+
if _gt0_cell_size == _seed_cell_size:
|
|
660
|
+
_enrichment_start = max(_seed_cell_size - self._enrichment_seed_cell_min_count_, 0)
|
|
661
|
+
_enrichment_end = _seed_cell_size
|
|
627
662
|
|
|
628
|
-
|
|
663
|
+
_seed_cell_en_index = trait_value_sort_index[_enrichment_start:_enrichment_end]
|
|
664
|
+
_seed_cell_en_weight = np.zeros(n_cells)
|
|
665
|
+
_tmp_weight = self._get_seed_cell_weight_(
|
|
666
|
+
seed_cell_index=_seed_cell_index if len(_seed_cell_en_index) == len(_seed_cell_index) else _seed_cell_en_index,
|
|
667
|
+
value=trait_value,
|
|
668
|
+
seed_cell_index_enrichment=_seed_cell_en_index
|
|
669
|
+
)
|
|
670
|
+
_seed_cell_en_weight[_seed_cell_en_index] = _tmp_weight
|
|
671
|
+
|
|
672
|
+
# 无权重版本(仅在需要时计算)
|
|
673
|
+
_seed_cell_matrix = None
|
|
674
|
+
_seed_cell_matrix_en = None
|
|
675
|
+
|
|
676
|
+
if not self.is_simple and self.is_ablation:
|
|
677
|
+
seed_cell_value = np.zeros(n_cells)
|
|
678
|
+
seed_cell_value[_seed_cell_index] = 1
|
|
679
|
+
_seed_cell_matrix = seed_cell_value / seed_cell_value.sum() if seed_cell_value.sum() else 0
|
|
680
|
+
|
|
681
|
+
seed_cell_en_value = np.zeros(n_cells)
|
|
682
|
+
seed_cell_en_value[_seed_cell_en_index] = 1
|
|
683
|
+
_seed_cell_matrix_en = seed_cell_en_value / seed_cell_en_value.sum() if seed_cell_en_value.sum() else 0
|
|
684
|
+
|
|
685
|
+
return dict(
|
|
686
|
+
seed_cell_count=_seed_cell_size,
|
|
687
|
+
seed_cell_threshold=trait_value[trait_value_sort_index[_seed_cell_size]],
|
|
688
|
+
seed_cell_index=_seed_cell_index,
|
|
689
|
+
seed_cell_weight=_seed_cell_weight,
|
|
690
|
+
seed_cell_en_index=_seed_cell_en_index,
|
|
691
|
+
seed_cell_en_weight=_seed_cell_en_weight,
|
|
692
|
+
seed_cell_matrix=_seed_cell_matrix,
|
|
693
|
+
seed_cell_matrix_en=_seed_cell_matrix_en
|
|
694
|
+
)
|
|
629
695
|
|
|
630
|
-
|
|
631
|
-
|
|
696
|
+
# 并行处理所有 trait
|
|
697
|
+
results = Parallel(n_jobs=-1, backend="threading")(delayed(_process_single_trait)(i) for i in self.trait_range)
|
|
632
698
|
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
seed_cell_index[:, i][_seed_cell_index_] = 1
|
|
636
|
-
seed_cell_weight[:, i][_seed_cell_index_] = self._get_seed_cell_weight_(seed_cell_index=_seed_cell_index_, value=trait_value)
|
|
699
|
+
# 将并行结果写回对应数组
|
|
700
|
+
for i, res in enumerate(results):
|
|
637
701
|
|
|
638
|
-
|
|
639
|
-
|
|
702
|
+
if res["seed_cell_index"] is None:
|
|
703
|
+
continue
|
|
640
704
|
|
|
641
|
-
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
705
|
+
seed_cell_count[i] = res["seed_cell_count"]
|
|
706
|
+
seed_cell_threshold[i] = res["seed_cell_threshold"]
|
|
707
|
+
seed_cell_index[res["seed_cell_index"], i] = 1
|
|
708
|
+
seed_cell_weight[:, i] = res["seed_cell_weight"]
|
|
709
|
+
seed_cell_weight_en[res["seed_cell_en_index"], i] = res["seed_cell_en_weight"]
|
|
645
710
|
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
seed_cell_index_enrichment=_seed_cell_en_index_
|
|
650
|
-
)
|
|
651
|
-
seed_cell_weight_en[:, i][_seed_cell_en_index_] = _seed_cell_en_weight_
|
|
652
|
-
|
|
653
|
-
if not self.is_simple and self.is_ablation:
|
|
654
|
-
# Without weight
|
|
655
|
-
seed_cell_value = np.zeros(self.cell_size)
|
|
656
|
-
seed_cell_value[_seed_cell_index_] = 1
|
|
657
|
-
seed_cell_matrix[:, i] = seed_cell_value / (1 if seed_cell_value.sum() == 0 else seed_cell_value.sum())
|
|
658
|
-
seed_cell_en_value = np.zeros(self.cell_size)
|
|
659
|
-
seed_cell_en_value[_seed_cell_en_index_] = 1
|
|
660
|
-
seed_cell_matrix_en[:, i] = seed_cell_en_value / (1 if seed_cell_en_value.sum() == 0 else seed_cell_en_value.sum())
|
|
711
|
+
if not self.is_simple and self.is_ablation:
|
|
712
|
+
seed_cell_matrix[:, i] = res["seed_cell_matrix"]
|
|
713
|
+
seed_cell_matrix_en[:, i] = res["seed_cell_matrix_en"]
|
|
661
714
|
|
|
662
715
|
return seed_cell_count, seed_cell_threshold, seed_cell_matrix, seed_cell_weight, seed_cell_index, seed_cell_matrix_en, seed_cell_weight_en
|
|
663
716
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sciv
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.84
|
|
4
4
|
Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
|
|
5
5
|
Project-URL: github, https://github.com/YuZhengM/sciv
|
|
6
6
|
Author-email: Zheng-Min Yu <yuzmbio@163.com>
|
|
@@ -27,13 +27,13 @@ sciv/preprocessing/_scanpy_.py,sha256=mmkk4cMCzJCziF49RnOuXBiF4frS6aSiwZdUmfDAg4
|
|
|
27
27
|
sciv/preprocessing/_scvi_.py,sha256=ZIDkQ_4deYmzSMiAbu5C3j_jMMl7hBTFLCBXHCNj3B4,10332
|
|
28
28
|
sciv/preprocessing/_snapatac_.py,sha256=Dq8CHF7Psl3CQszaEokQYO56Oe2uzyWOy_cGlaOywfc,27798
|
|
29
29
|
sciv/tool/__init__.py,sha256=WXzHkWt6RgBC3qqD-98nR5wQmt6oC850ox_VpMrapSU,2468
|
|
30
|
-
sciv/tool/_algorithm_.py,sha256=
|
|
30
|
+
sciv/tool/_algorithm_.py,sha256=okGpH2OrBTO59LkyznT4gRi5S45oAcnO10Kxo5Xzy4I,47991
|
|
31
31
|
sciv/tool/_matrix_.py,sha256=O1EAhA9wxh06P_eOxEBesK7kO7IExKlhH6uJzGh1HBM,24322
|
|
32
|
-
sciv/tool/_random_walk_.py,sha256=
|
|
32
|
+
sciv/tool/_random_walk_.py,sha256=XE7LlTFn5vHVth47cLtIJC6D4vwOsNM5sgrFwx-KL3g,48671
|
|
33
33
|
sciv/util/__init__.py,sha256=nOxZ8if27X7AUJ6hZwTwxOJwIBJb0obWlHjqCzjg_Gc,1964
|
|
34
34
|
sciv/util/_constant_.py,sha256=w0wKQd8guLd1ZTW24_5aECrWsIWDiNQmEpLsWlHar1A,3000
|
|
35
35
|
sciv/util/_core_.py,sha256=ZD2uSnEBHVu0i9TmXWzri_3bXZzYKnIZk818gW3zadE,14751
|
|
36
|
-
sciv-0.0.
|
|
37
|
-
sciv-0.0.
|
|
38
|
-
sciv-0.0.
|
|
39
|
-
sciv-0.0.
|
|
36
|
+
sciv-0.0.84.dist-info/METADATA,sha256=XIG0HyGsbObIpMSR8_t0W9IZIAUy-shlHS07ziGAAGI,3465
|
|
37
|
+
sciv-0.0.84.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
38
|
+
sciv-0.0.84.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
|
|
39
|
+
sciv-0.0.84.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|