sciv 0.0.96__py3-none-any.whl → 0.0.97__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sciv/model/_core_.py +2 -2
- sciv/tool/_algorithm_.py +47 -18
- sciv/util/_core_.py +6 -4
- {sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/METADATA +1 -1
- {sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/RECORD +7 -7
- {sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/WHEEL +0 -0
- {sciv-0.0.96.dist-info → sciv-0.0.97.dist-info}/licenses/LICENSE +0 -0
sciv/model/_core_.py
CHANGED
|
@@ -81,7 +81,7 @@ def core(
|
|
|
81
81
|
weight: float = 0.1,
|
|
82
82
|
kernel: Literal["laplacian", "gaussian"] = "gaussian",
|
|
83
83
|
local_k: int = 10,
|
|
84
|
-
kernel_gamma: Optional[float, collection] = None,
|
|
84
|
+
kernel_gamma: Optional[Union[float, collection]] = None,
|
|
85
85
|
epsilon: float = 1e-05,
|
|
86
86
|
gamma: float = 0.05,
|
|
87
87
|
enrichment_gamma: float = 0.05,
|
|
@@ -481,7 +481,7 @@ def core(
|
|
|
481
481
|
)
|
|
482
482
|
|
|
483
483
|
else:
|
|
484
|
-
overlap_adata: AnnData = overlap_sum(adata, variants, trait_info)
|
|
484
|
+
overlap_adata: AnnData = overlap_sum(adata, variants, trait_info, n_jobs=n_jobs)
|
|
485
485
|
|
|
486
486
|
del variants, trait_info
|
|
487
487
|
|
sciv/tool/_algorithm_.py
CHANGED
|
@@ -7,6 +7,8 @@ from typing import Union, Tuple, Literal, Optional
|
|
|
7
7
|
from scipy import sparse
|
|
8
8
|
from scipy.stats import norm
|
|
9
9
|
from tqdm import tqdm
|
|
10
|
+
from joblib import Parallel, delayed
|
|
11
|
+
import multiprocessing
|
|
10
12
|
|
|
11
13
|
import numpy as np
|
|
12
14
|
from anndata import AnnData
|
|
@@ -516,7 +518,7 @@ def semi_mutual_knn_weight(
|
|
|
516
518
|
adj_weight = (1 - weight) * adj_and.astype(np.float32) + weight * adj_or.astype(np.float32)
|
|
517
519
|
|
|
518
520
|
# Ensure full connectivity if required
|
|
519
|
-
if is_mknn_fully_connected
|
|
521
|
+
if is_mknn_fully_connected:
|
|
520
522
|
adj_1nn = _knn(new_data, 1)
|
|
521
523
|
|
|
522
524
|
if sparse.issparse(adj_and):
|
|
@@ -824,16 +826,17 @@ def _overlap_(regions_sort: DataFrame, variants: DataFrame) -> DataFrame:
|
|
|
824
826
|
if chr_a in chr_keys:
|
|
825
827
|
# get chr variant
|
|
826
828
|
variants_chr_type_position_list = variants_position_list[chr_a]
|
|
829
|
+
|
|
827
830
|
# judge start and end position
|
|
828
831
|
if start <= variants_chr_type_position_list[-1] and end >= variants_chr_type_position_list[0]:
|
|
829
832
|
# get index
|
|
830
|
-
start_index = get_index(start, variants_chr_type_position_list)
|
|
831
|
-
end_index = get_index(end, variants_chr_type_position_list)
|
|
833
|
+
start_index = get_index(start, variants_chr_type_position_list, False)
|
|
834
|
+
end_index = get_index(end, variants_chr_type_position_list, False)
|
|
832
835
|
|
|
833
836
|
# Determine whether it is equal, Equality means there is no overlap
|
|
834
837
|
if start_index != end_index:
|
|
835
|
-
start_index = start_index if isinstance(start_index,
|
|
836
|
-
end_index = end_index + 1 if isinstance(end_index,
|
|
838
|
+
start_index = start_index if isinstance(start_index, int) else start_index[1]
|
|
839
|
+
end_index = end_index + 1 if isinstance(end_index, int) else end_index[1]
|
|
837
840
|
|
|
838
841
|
if start_index > end_index:
|
|
839
842
|
ul.log(__name__).error("The end index in the region is greater than the start index.")
|
|
@@ -894,13 +897,14 @@ def overlap(regions: DataFrame, variants: DataFrame) -> DataFrame:
|
|
|
894
897
|
return _overlap_(regions_sort, variants)
|
|
895
898
|
|
|
896
899
|
|
|
897
|
-
def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnData:
|
|
900
|
+
def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame, n_jobs: int = -1) -> AnnData:
|
|
898
901
|
"""
|
|
899
902
|
Overlap regional data and mutation data and sum the PP values of all mutations in a region as the values for that
|
|
900
903
|
region.
|
|
901
904
|
:param regions: peaks data
|
|
902
905
|
:param variants: variants data
|
|
903
906
|
:param trait_info: traits information
|
|
907
|
+
:param n_jobs: The maximum number of concurrently running jobs
|
|
904
908
|
:return: overlap data
|
|
905
909
|
"""
|
|
906
910
|
|
|
@@ -916,8 +920,6 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
|
|
|
916
920
|
|
|
917
921
|
trait_names = trait_info["id"].tolist()
|
|
918
922
|
n_trait = len(trait_names)
|
|
919
|
-
# Pre-allocate sparse matrix, fill column by column, then convert to csc and then csr for efficiency
|
|
920
|
-
row_indices, col_indices, data_vals = [], [], []
|
|
921
923
|
|
|
922
924
|
# Check column existence once
|
|
923
925
|
required = {"chr", "start", "end"}
|
|
@@ -941,13 +943,18 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
|
|
|
941
943
|
|
|
942
944
|
ul.log(__name__).info("Obtain peak-trait/disease matrix. (overlap variant information)")
|
|
943
945
|
|
|
944
|
-
#
|
|
945
|
-
|
|
946
|
+
# Function to process a single trait
|
|
947
|
+
def _process_trait_(trait_name, col_idx):
|
|
948
|
+
|
|
949
|
+
local_data_vals = []
|
|
950
|
+
local_row_indices = []
|
|
951
|
+
local_col_indices = []
|
|
952
|
+
|
|
946
953
|
variant: AnnData = variants[trait_name]
|
|
947
954
|
overlap_df: DataFrame = _overlap_(regions_df, variant.obs)
|
|
948
955
|
|
|
949
956
|
if overlap_df.empty:
|
|
950
|
-
|
|
957
|
+
return local_data_vals, local_row_indices, local_col_indices
|
|
951
958
|
|
|
952
959
|
# Sum at once: first group by label and collect variant_id into a list
|
|
953
960
|
label_var_ids = (
|
|
@@ -972,15 +979,37 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
|
|
|
972
979
|
if matrix_sum.size == 1:
|
|
973
980
|
val = float(matrix_sum)
|
|
974
981
|
if val != 0:
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
982
|
+
local_row_indices.append(row_idx)
|
|
983
|
+
local_col_indices.append(col_idx)
|
|
984
|
+
local_data_vals.append(val)
|
|
978
985
|
else:
|
|
979
986
|
for t_idx, v in enumerate(matrix_sum):
|
|
980
987
|
if v != 0:
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
988
|
+
local_row_indices.append(row_idx)
|
|
989
|
+
local_col_indices.append(col_idx + t_idx)
|
|
990
|
+
local_data_vals.append(float(v))
|
|
991
|
+
|
|
992
|
+
return local_data_vals, local_row_indices, local_col_indices
|
|
993
|
+
|
|
994
|
+
# Use Parallel to process traits in parallel
|
|
995
|
+
results = Parallel(n_jobs=n_jobs)(
|
|
996
|
+
delayed(_process_trait_)(trait_name, col_idx) for col_idx, trait_name in enumerate(trait_names)
|
|
997
|
+
)
|
|
998
|
+
|
|
999
|
+
# Preallocate length to avoid list dynamic expansion
|
|
1000
|
+
total = sum(len(ld) for ld, _, _ in results)
|
|
1001
|
+
row_indices = np.empty(total, dtype=np.int32)
|
|
1002
|
+
col_indices = np.empty(total, dtype=np.int32)
|
|
1003
|
+
data_vals = np.empty(total, dtype=np.float32)
|
|
1004
|
+
|
|
1005
|
+
ptr = 0
|
|
1006
|
+
|
|
1007
|
+
for local_data, local_rows, local_cols in results:
|
|
1008
|
+
n = len(local_data)
|
|
1009
|
+
row_indices[ptr:ptr+n] = local_rows
|
|
1010
|
+
col_indices[ptr:ptr+n] = local_cols
|
|
1011
|
+
data_vals[ptr:ptr+n] = local_data
|
|
1012
|
+
ptr += n
|
|
984
1013
|
|
|
985
1014
|
# Build sparse matrix, then convert to csr format
|
|
986
1015
|
overlap_sparse = sparse.csc_matrix(
|
|
@@ -1253,7 +1282,7 @@ def obtain_cell_cell_network(
|
|
|
1253
1282
|
weight: float = 0.1,
|
|
1254
1283
|
kernel: Literal["laplacian", "gaussian"] = "gaussian",
|
|
1255
1284
|
local_k: int = 10,
|
|
1256
|
-
gamma: Optional[float, collection] = None,
|
|
1285
|
+
gamma: Optional[Union[float, collection]] = None,
|
|
1257
1286
|
is_simple: bool = True
|
|
1258
1287
|
) -> AnnData:
|
|
1259
1288
|
"""
|
sciv/util/_core_.py
CHANGED
|
@@ -90,17 +90,20 @@ def sum_min_max(data: matrix_data, axis: int = 1) -> Tuple[number, number]:
|
|
|
90
90
|
return min(rows_sum), max(rows_sum)
|
|
91
91
|
|
|
92
92
|
|
|
93
|
-
def get_index(position: number, positions_list: list) -> Union[
|
|
93
|
+
def get_index(position: number, positions_list: list, is_sort: bool = True) -> Union[int, Tuple[int, int]]:
|
|
94
94
|
"""
|
|
95
95
|
Search for position information. Similar to half search.
|
|
96
96
|
If the position exists in the list, return the index.
|
|
97
97
|
If it does not exist, return the index located between the two indexes
|
|
98
98
|
:param position: position
|
|
99
99
|
:param positions_list: position list
|
|
100
|
+
:param is_sort: True
|
|
100
101
|
:return: position index
|
|
101
102
|
"""
|
|
102
|
-
|
|
103
|
-
|
|
103
|
+
|
|
104
|
+
if is_sort:
|
|
105
|
+
positions_list.sort()
|
|
106
|
+
|
|
104
107
|
# search
|
|
105
108
|
position_size: int = len(positions_list)
|
|
106
109
|
left, right = 0, position_size - 1
|
|
@@ -441,7 +444,6 @@ def plot_end(
|
|
|
441
444
|
close: bool = False,
|
|
442
445
|
dpi: float = 300
|
|
443
446
|
):
|
|
444
|
-
|
|
445
447
|
if title is not None:
|
|
446
448
|
plt.title(title)
|
|
447
449
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sciv
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.97
|
|
4
4
|
Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
|
|
5
5
|
Project-URL: github, https://github.com/YuZhengM/sciv
|
|
6
6
|
Author-email: Zheng-Min Yu <yuzmbio@163.com>
|
|
@@ -3,7 +3,7 @@ sciv/file/__init__.py,sha256=8cYLG0S0nilblmyX46CWFrbLr-rmLbO1EEO477pZ-gk,520
|
|
|
3
3
|
sciv/file/_read_.py,sha256=UZJpN3_5hBiTjzEYO6YXORcE_dqA8HmLpV80nqTLNSo,30554
|
|
4
4
|
sciv/file/_write_.py,sha256=W3M9CmPi7BuKAffz1fdi-vA5DzAFZ7wmcggp33N9Xtg,7848
|
|
5
5
|
sciv/model/__init__.py,sha256=k8SO9FpJaGn2ANqJyaz3HXMas7jH9toPVtpw703kOqg,149
|
|
6
|
-
sciv/model/_core_.py,sha256=
|
|
6
|
+
sciv/model/_core_.py,sha256=rKIEhVz67HM7hXkEUpUR9l7uyIK72jK1zANPVpB_Hcc,33277
|
|
7
7
|
sciv/plot/__init__.py,sha256=2tRNT6TZNz9r38lnna712RGsH7OJ2QkGa37XKgzejHQ,1865
|
|
8
8
|
sciv/plot/_bar_.py,sha256=xWpFbJTHgQMLuUSXa4uE69RGHXSCGinapxL-1imdDcU,14355
|
|
9
9
|
sciv/plot/_barcode_.py,sha256=RDOedQ8ZtXWFyJ2c772RDfqO4TMIpHMvcMZMAVqky90,5073
|
|
@@ -27,13 +27,13 @@ sciv/preprocessing/_scanpy_.py,sha256=tB8BD2wpLAU8_YxdqrgNtcjpNXNRo-JCdm2lxaKDBL
|
|
|
27
27
|
sciv/preprocessing/_scvi_.py,sha256=7QxwPA2kR_g15X28aEak7AFA4kyQ-UbtpiLH-rc5Ksg,10780
|
|
28
28
|
sciv/preprocessing/_snapatac_.py,sha256=Dq8CHF7Psl3CQszaEokQYO56Oe2uzyWOy_cGlaOywfc,27798
|
|
29
29
|
sciv/tool/__init__.py,sha256=WXzHkWt6RgBC3qqD-98nR5wQmt6oC850ox_VpMrapSU,2468
|
|
30
|
-
sciv/tool/_algorithm_.py,sha256=
|
|
30
|
+
sciv/tool/_algorithm_.py,sha256=BzUUBn22R12PoQHSjhcKZfj6Yw01Qh_VIgl8RXgWvfY,53580
|
|
31
31
|
sciv/tool/_matrix_.py,sha256=SnC3sXic_ufuEXStcD_HncvYH6apBdNK6nhG6jFLmjA,24324
|
|
32
32
|
sciv/tool/_random_walk_.py,sha256=JOB97XLxlZYHvlIST1wlXgA0mw6fybkWnJGq6X_kbsk,48871
|
|
33
33
|
sciv/util/__init__.py,sha256=nOxZ8if27X7AUJ6hZwTwxOJwIBJb0obWlHjqCzjg_Gc,1964
|
|
34
34
|
sciv/util/_constant_.py,sha256=w0wKQd8guLd1ZTW24_5aECrWsIWDiNQmEpLsWlHar1A,3000
|
|
35
|
-
sciv/util/_core_.py,sha256=
|
|
36
|
-
sciv-0.0.
|
|
37
|
-
sciv-0.0.
|
|
38
|
-
sciv-0.0.
|
|
39
|
-
sciv-0.0.
|
|
35
|
+
sciv/util/_core_.py,sha256=TUWfBNRJzWuoQ9ffew_DjnlkNydG-Rmujl_RH4Ln9io,14917
|
|
36
|
+
sciv-0.0.97.dist-info/METADATA,sha256=lCXJ0ySEYDpLmHDMuR7FXcg5bEKA_THBDG1aCqH7Siw,3465
|
|
37
|
+
sciv-0.0.97.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
38
|
+
sciv-0.0.97.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
|
|
39
|
+
sciv-0.0.97.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|