sciv 0.0.96__py3-none-any.whl → 0.0.98__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sciv/model/_core_.py +25 -13
- sciv/tool/_algorithm_.py +55 -18
- sciv/util/_core_.py +6 -4
- {sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/METADATA +1 -1
- {sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/RECORD +7 -7
- {sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/WHEEL +0 -0
- {sciv-0.0.96.dist-info → sciv-0.0.98.dist-info}/licenses/LICENSE +0 -0
sciv/model/_core_.py
CHANGED
|
@@ -15,7 +15,7 @@ from ..tool import RandomWalk, overlap_sum, obtain_cell_cell_network, calculate_
|
|
|
15
15
|
|
|
16
16
|
from ..file import save_h5ad, save_pkl, read_h5ad, read_pkl
|
|
17
17
|
from ..preprocessing import filter_data, poisson_vi
|
|
18
|
-
from ..util import path, enrichment_optional, to_dense, collection, to_sparse, difference_peak_optional
|
|
18
|
+
from ..util import path, enrichment_optional, to_dense, collection, to_sparse, difference_peak_optional, project_name
|
|
19
19
|
|
|
20
20
|
__name__: str = "model_core"
|
|
21
21
|
|
|
@@ -81,7 +81,7 @@ def core(
|
|
|
81
81
|
weight: float = 0.1,
|
|
82
82
|
kernel: Literal["laplacian", "gaussian"] = "gaussian",
|
|
83
83
|
local_k: int = 10,
|
|
84
|
-
kernel_gamma: Optional[float, collection] = None,
|
|
84
|
+
kernel_gamma: Optional[Union[float, collection]] = None,
|
|
85
85
|
epsilon: float = 1e-05,
|
|
86
86
|
gamma: float = 0.05,
|
|
87
87
|
enrichment_gamma: float = 0.05,
|
|
@@ -453,7 +453,7 @@ def core(
|
|
|
453
453
|
model_dir=model_dir
|
|
454
454
|
)
|
|
455
455
|
|
|
456
|
-
|
|
456
|
+
poisson_vi_time = adata.uns["elapsed_time"] + da_peaks.uns["elapsed_time"]
|
|
457
457
|
|
|
458
458
|
if save_path is not None:
|
|
459
459
|
|
|
@@ -481,11 +481,11 @@ def core(
|
|
|
481
481
|
)
|
|
482
482
|
|
|
483
483
|
else:
|
|
484
|
-
overlap_adata: AnnData = overlap_sum(adata, variants, trait_info)
|
|
484
|
+
overlap_adata: AnnData = overlap_sum(adata, variants, trait_info, n_jobs=n_jobs)
|
|
485
485
|
|
|
486
486
|
del variants, trait_info
|
|
487
487
|
|
|
488
|
-
|
|
488
|
+
overlap_time = overlap_adata.uns["elapsed_time"]
|
|
489
489
|
|
|
490
490
|
if save_path is not None and not overlap_is_read:
|
|
491
491
|
save_h5ad(overlap_adata, file=atac_overlap_save_file)
|
|
@@ -518,7 +518,7 @@ def core(
|
|
|
518
518
|
|
|
519
519
|
del da_peaks, overlap_adata
|
|
520
520
|
|
|
521
|
-
|
|
521
|
+
init_score_time = init_score.uns["elapsed_time"]
|
|
522
522
|
|
|
523
523
|
if save_path is not None and not init_score_is_read:
|
|
524
524
|
save_h5ad(init_score, file=init_score_save_file)
|
|
@@ -546,13 +546,13 @@ def core(
|
|
|
546
546
|
|
|
547
547
|
del adata
|
|
548
548
|
|
|
549
|
-
|
|
549
|
+
smknn_time = cc_data.uns["elapsed_time"]
|
|
550
550
|
|
|
551
551
|
if save_path is not None and not cc_data_is_read:
|
|
552
552
|
save_h5ad(cc_data, file=cc_data_save_file)
|
|
553
553
|
|
|
554
554
|
"""
|
|
555
|
-
5. Random walk
|
|
555
|
+
5. Random walk with weighted seed cells
|
|
556
556
|
"""
|
|
557
557
|
|
|
558
558
|
random_walk_is_read: bool = is_file_exist_loading and os.path.exists(random_walk_save_file) and is_save_random_walk_model
|
|
@@ -584,17 +584,29 @@ def core(
|
|
|
584
584
|
del random_walk_is_read, init_score, cc_data
|
|
585
585
|
|
|
586
586
|
trs = _run_random_walk_(random_walk, is_ablation, is_simple)
|
|
587
|
+
trs.uns["params"] = params
|
|
588
|
+
|
|
589
|
+
del params
|
|
587
590
|
|
|
588
|
-
|
|
591
|
+
random_walk_time = random_walk.elapsed_time
|
|
589
592
|
|
|
590
593
|
# end time
|
|
591
594
|
elapsed_time = time.time() - start_time
|
|
592
|
-
step_time =
|
|
595
|
+
step_time = poisson_vi_time + overlap_time + init_score_time + smknn_time + random_walk_time
|
|
593
596
|
|
|
594
|
-
|
|
595
|
-
|
|
597
|
+
if elapsed_time < step_time:
|
|
598
|
+
elapsed_time = step_time
|
|
596
599
|
|
|
597
|
-
|
|
600
|
+
ul.log(__name__).info(f"Algorithm {project_name} consumes a total of {elapsed_time} seconds.")
|
|
601
|
+
|
|
602
|
+
trs.uns["elapsed_time"] = {
|
|
603
|
+
"PoissonVI": poisson_vi_time,
|
|
604
|
+
"Overlap": overlap_time,
|
|
605
|
+
"initial TRS": init_score_time,
|
|
606
|
+
"SM-kNN": smknn_time,
|
|
607
|
+
"Random walk": random_walk_time,
|
|
608
|
+
"Total time": elapsed_time
|
|
609
|
+
}
|
|
598
610
|
|
|
599
611
|
if save_path is not None:
|
|
600
612
|
save_h5ad(trs, file=trs_save_file)
|
sciv/tool/_algorithm_.py
CHANGED
|
@@ -7,6 +7,8 @@ from typing import Union, Tuple, Literal, Optional
|
|
|
7
7
|
from scipy import sparse
|
|
8
8
|
from scipy.stats import norm
|
|
9
9
|
from tqdm import tqdm
|
|
10
|
+
from joblib import Parallel, delayed
|
|
11
|
+
import multiprocessing
|
|
10
12
|
|
|
11
13
|
import numpy as np
|
|
12
14
|
from anndata import AnnData
|
|
@@ -516,7 +518,7 @@ def semi_mutual_knn_weight(
|
|
|
516
518
|
adj_weight = (1 - weight) * adj_and.astype(np.float32) + weight * adj_or.astype(np.float32)
|
|
517
519
|
|
|
518
520
|
# Ensure full connectivity if required
|
|
519
|
-
if is_mknn_fully_connected
|
|
521
|
+
if is_mknn_fully_connected:
|
|
520
522
|
adj_1nn = _knn(new_data, 1)
|
|
521
523
|
|
|
522
524
|
if sparse.issparse(adj_and):
|
|
@@ -824,16 +826,17 @@ def _overlap_(regions_sort: DataFrame, variants: DataFrame) -> DataFrame:
|
|
|
824
826
|
if chr_a in chr_keys:
|
|
825
827
|
# get chr variant
|
|
826
828
|
variants_chr_type_position_list = variants_position_list[chr_a]
|
|
829
|
+
|
|
827
830
|
# judge start and end position
|
|
828
831
|
if start <= variants_chr_type_position_list[-1] and end >= variants_chr_type_position_list[0]:
|
|
829
832
|
# get index
|
|
830
|
-
start_index = get_index(start, variants_chr_type_position_list)
|
|
831
|
-
end_index = get_index(end, variants_chr_type_position_list)
|
|
833
|
+
start_index = get_index(start, variants_chr_type_position_list, False)
|
|
834
|
+
end_index = get_index(end, variants_chr_type_position_list, False)
|
|
832
835
|
|
|
833
836
|
# Determine whether it is equal, Equality means there is no overlap
|
|
834
837
|
if start_index != end_index:
|
|
835
|
-
start_index = start_index if isinstance(start_index,
|
|
836
|
-
end_index = end_index + 1 if isinstance(end_index,
|
|
838
|
+
start_index = start_index if isinstance(start_index, int) else start_index[1]
|
|
839
|
+
end_index = end_index + 1 if isinstance(end_index, int) else end_index[1]
|
|
837
840
|
|
|
838
841
|
if start_index > end_index:
|
|
839
842
|
ul.log(__name__).error("The end index in the region is greater than the start index.")
|
|
@@ -894,13 +897,14 @@ def overlap(regions: DataFrame, variants: DataFrame) -> DataFrame:
|
|
|
894
897
|
return _overlap_(regions_sort, variants)
|
|
895
898
|
|
|
896
899
|
|
|
897
|
-
def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnData:
|
|
900
|
+
def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame, n_jobs: int = -1) -> AnnData:
|
|
898
901
|
"""
|
|
899
902
|
Overlap regional data and mutation data and sum the PP values of all mutations in a region as the values for that
|
|
900
903
|
region.
|
|
901
904
|
:param regions: peaks data
|
|
902
905
|
:param variants: variants data
|
|
903
906
|
:param trait_info: traits information
|
|
907
|
+
:param n_jobs: The maximum number of concurrently running jobs
|
|
904
908
|
:return: overlap data
|
|
905
909
|
"""
|
|
906
910
|
|
|
@@ -916,8 +920,6 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
|
|
|
916
920
|
|
|
917
921
|
trait_names = trait_info["id"].tolist()
|
|
918
922
|
n_trait = len(trait_names)
|
|
919
|
-
# Pre-allocate sparse matrix, fill column by column, then convert to csc and then csr for efficiency
|
|
920
|
-
row_indices, col_indices, data_vals = [], [], []
|
|
921
923
|
|
|
922
924
|
# Check column existence once
|
|
923
925
|
required = {"chr", "start", "end"}
|
|
@@ -941,13 +943,18 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
|
|
|
941
943
|
|
|
942
944
|
ul.log(__name__).info("Obtain peak-trait/disease matrix. (overlap variant information)")
|
|
943
945
|
|
|
944
|
-
#
|
|
945
|
-
|
|
946
|
+
# Function to process a single trait
|
|
947
|
+
def _process_trait_(trait_name, col_idx):
|
|
948
|
+
|
|
949
|
+
local_data_vals = []
|
|
950
|
+
local_row_indices = []
|
|
951
|
+
local_col_indices = []
|
|
952
|
+
|
|
946
953
|
variant: AnnData = variants[trait_name]
|
|
947
954
|
overlap_df: DataFrame = _overlap_(regions_df, variant.obs)
|
|
948
955
|
|
|
949
956
|
if overlap_df.empty:
|
|
950
|
-
|
|
957
|
+
return local_data_vals, local_row_indices, local_col_indices
|
|
951
958
|
|
|
952
959
|
# Sum at once: first group by label and collect variant_id into a list
|
|
953
960
|
label_var_ids = (
|
|
@@ -972,15 +979,37 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
|
|
|
972
979
|
if matrix_sum.size == 1:
|
|
973
980
|
val = float(matrix_sum)
|
|
974
981
|
if val != 0:
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
982
|
+
local_row_indices.append(row_idx)
|
|
983
|
+
local_col_indices.append(col_idx)
|
|
984
|
+
local_data_vals.append(val)
|
|
978
985
|
else:
|
|
979
986
|
for t_idx, v in enumerate(matrix_sum):
|
|
980
987
|
if v != 0:
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
988
|
+
local_row_indices.append(row_idx)
|
|
989
|
+
local_col_indices.append(col_idx + t_idx)
|
|
990
|
+
local_data_vals.append(float(v))
|
|
991
|
+
|
|
992
|
+
return local_data_vals, local_row_indices, local_col_indices
|
|
993
|
+
|
|
994
|
+
# Use Parallel to process traits in parallel
|
|
995
|
+
results = Parallel(n_jobs=n_jobs)(
|
|
996
|
+
delayed(_process_trait_)(trait_name, col_idx) for col_idx, trait_name in tqdm(enumerate(trait_names))
|
|
997
|
+
)
|
|
998
|
+
|
|
999
|
+
# Preallocate length to avoid list dynamic expansion
|
|
1000
|
+
total = sum(len(ld) for ld, _, _ in results)
|
|
1001
|
+
row_indices = np.empty(total, dtype=np.int32)
|
|
1002
|
+
col_indices = np.empty(total, dtype=np.int32)
|
|
1003
|
+
data_vals = np.empty(total, dtype=np.float32)
|
|
1004
|
+
|
|
1005
|
+
ptr = 0
|
|
1006
|
+
|
|
1007
|
+
for local_data, local_rows, local_cols in results:
|
|
1008
|
+
n = len(local_data)
|
|
1009
|
+
row_indices[ptr:ptr+n] = local_rows
|
|
1010
|
+
col_indices[ptr:ptr+n] = local_cols
|
|
1011
|
+
data_vals[ptr:ptr+n] = local_data
|
|
1012
|
+
ptr += n
|
|
984
1013
|
|
|
985
1014
|
# Build sparse matrix, then convert to csr format
|
|
986
1015
|
overlap_sparse = sparse.csc_matrix(
|
|
@@ -1205,10 +1234,18 @@ def calculate_init_score_weight(
|
|
|
1205
1234
|
|
|
1206
1235
|
ul.log(__name__).info("Calculate initial trait relevance scores")
|
|
1207
1236
|
_init_trs_weight_ = np.multiply(_init_trs_ncw_, _cell_type_weight_)
|
|
1237
|
+
|
|
1238
|
+
if hasattr(_init_trs_weight_, "A"):
|
|
1239
|
+
_init_trs_weight_ = _init_trs_weight_.A
|
|
1240
|
+
|
|
1208
1241
|
init_trs_adata = AnnData(_init_trs_weight_, obs=cell_anno, var=trait_anno)
|
|
1209
1242
|
del _init_trs_weight_
|
|
1210
1243
|
|
|
1211
1244
|
if not is_simple:
|
|
1245
|
+
|
|
1246
|
+
if hasattr(_init_trs_ncw_, "A"):
|
|
1247
|
+
_init_trs_ncw_ = _init_trs_ncw_.A
|
|
1248
|
+
|
|
1212
1249
|
init_trs_adata.layers["init_trs_ncw"] = _init_trs_ncw_
|
|
1213
1250
|
init_trs_adata.layers["cell_type_weight"] = to_sparse(_cell_type_weight_)
|
|
1214
1251
|
init_trs_adata.uns["cluster_weight_factor"] = da_peaks_adata.obsm["cluster_weight"]
|
|
@@ -1253,7 +1290,7 @@ def obtain_cell_cell_network(
|
|
|
1253
1290
|
weight: float = 0.1,
|
|
1254
1291
|
kernel: Literal["laplacian", "gaussian"] = "gaussian",
|
|
1255
1292
|
local_k: int = 10,
|
|
1256
|
-
gamma: Optional[float, collection] = None,
|
|
1293
|
+
gamma: Optional[Union[float, collection]] = None,
|
|
1257
1294
|
is_simple: bool = True
|
|
1258
1295
|
) -> AnnData:
|
|
1259
1296
|
"""
|
sciv/util/_core_.py
CHANGED
|
@@ -90,17 +90,20 @@ def sum_min_max(data: matrix_data, axis: int = 1) -> Tuple[number, number]:
|
|
|
90
90
|
return min(rows_sum), max(rows_sum)
|
|
91
91
|
|
|
92
92
|
|
|
93
|
-
def get_index(position: number, positions_list: list) -> Union[
|
|
93
|
+
def get_index(position: number, positions_list: list, is_sort: bool = True) -> Union[int, Tuple[int, int]]:
|
|
94
94
|
"""
|
|
95
95
|
Search for position information. Similar to half search.
|
|
96
96
|
If the position exists in the list, return the index.
|
|
97
97
|
If it does not exist, return the index located between the two indexes
|
|
98
98
|
:param position: position
|
|
99
99
|
:param positions_list: position list
|
|
100
|
+
:param is_sort: True
|
|
100
101
|
:return: position index
|
|
101
102
|
"""
|
|
102
|
-
|
|
103
|
-
|
|
103
|
+
|
|
104
|
+
if is_sort:
|
|
105
|
+
positions_list.sort()
|
|
106
|
+
|
|
104
107
|
# search
|
|
105
108
|
position_size: int = len(positions_list)
|
|
106
109
|
left, right = 0, position_size - 1
|
|
@@ -441,7 +444,6 @@ def plot_end(
|
|
|
441
444
|
close: bool = False,
|
|
442
445
|
dpi: float = 300
|
|
443
446
|
):
|
|
444
|
-
|
|
445
447
|
if title is not None:
|
|
446
448
|
plt.title(title)
|
|
447
449
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sciv
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.98
|
|
4
4
|
Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
|
|
5
5
|
Project-URL: github, https://github.com/YuZhengM/sciv
|
|
6
6
|
Author-email: Zheng-Min Yu <yuzmbio@163.com>
|
|
@@ -3,7 +3,7 @@ sciv/file/__init__.py,sha256=8cYLG0S0nilblmyX46CWFrbLr-rmLbO1EEO477pZ-gk,520
|
|
|
3
3
|
sciv/file/_read_.py,sha256=UZJpN3_5hBiTjzEYO6YXORcE_dqA8HmLpV80nqTLNSo,30554
|
|
4
4
|
sciv/file/_write_.py,sha256=W3M9CmPi7BuKAffz1fdi-vA5DzAFZ7wmcggp33N9Xtg,7848
|
|
5
5
|
sciv/model/__init__.py,sha256=k8SO9FpJaGn2ANqJyaz3HXMas7jH9toPVtpw703kOqg,149
|
|
6
|
-
sciv/model/_core_.py,sha256=
|
|
6
|
+
sciv/model/_core_.py,sha256=HCXPFOxfed5TUJMI-pZV9cx9_2R8z4dNPb2f7Hp3mVc,33693
|
|
7
7
|
sciv/plot/__init__.py,sha256=2tRNT6TZNz9r38lnna712RGsH7OJ2QkGa37XKgzejHQ,1865
|
|
8
8
|
sciv/plot/_bar_.py,sha256=xWpFbJTHgQMLuUSXa4uE69RGHXSCGinapxL-1imdDcU,14355
|
|
9
9
|
sciv/plot/_barcode_.py,sha256=RDOedQ8ZtXWFyJ2c772RDfqO4TMIpHMvcMZMAVqky90,5073
|
|
@@ -27,13 +27,13 @@ sciv/preprocessing/_scanpy_.py,sha256=tB8BD2wpLAU8_YxdqrgNtcjpNXNRo-JCdm2lxaKDBL
|
|
|
27
27
|
sciv/preprocessing/_scvi_.py,sha256=7QxwPA2kR_g15X28aEak7AFA4kyQ-UbtpiLH-rc5Ksg,10780
|
|
28
28
|
sciv/preprocessing/_snapatac_.py,sha256=Dq8CHF7Psl3CQszaEokQYO56Oe2uzyWOy_cGlaOywfc,27798
|
|
29
29
|
sciv/tool/__init__.py,sha256=WXzHkWt6RgBC3qqD-98nR5wQmt6oC850ox_VpMrapSU,2468
|
|
30
|
-
sciv/tool/_algorithm_.py,sha256=
|
|
30
|
+
sciv/tool/_algorithm_.py,sha256=mYKfSuYGelLd2secwyqGPxBQYd3x2yDKw1z7HK8mqYE,53773
|
|
31
31
|
sciv/tool/_matrix_.py,sha256=SnC3sXic_ufuEXStcD_HncvYH6apBdNK6nhG6jFLmjA,24324
|
|
32
32
|
sciv/tool/_random_walk_.py,sha256=JOB97XLxlZYHvlIST1wlXgA0mw6fybkWnJGq6X_kbsk,48871
|
|
33
33
|
sciv/util/__init__.py,sha256=nOxZ8if27X7AUJ6hZwTwxOJwIBJb0obWlHjqCzjg_Gc,1964
|
|
34
34
|
sciv/util/_constant_.py,sha256=w0wKQd8guLd1ZTW24_5aECrWsIWDiNQmEpLsWlHar1A,3000
|
|
35
|
-
sciv/util/_core_.py,sha256=
|
|
36
|
-
sciv-0.0.
|
|
37
|
-
sciv-0.0.
|
|
38
|
-
sciv-0.0.
|
|
39
|
-
sciv-0.0.
|
|
35
|
+
sciv/util/_core_.py,sha256=TUWfBNRJzWuoQ9ffew_DjnlkNydG-Rmujl_RH4Ln9io,14917
|
|
36
|
+
sciv-0.0.98.dist-info/METADATA,sha256=yZiAx2el-OqapNmZNjUpjAmTAmQbIcBIZa7QFXjYPYc,3465
|
|
37
|
+
sciv-0.0.98.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
38
|
+
sciv-0.0.98.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
|
|
39
|
+
sciv-0.0.98.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|