sciv 0.0.94__py3-none-any.whl → 0.0.96__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sciv/model/_core_.py +268 -640
- sciv/plot/_bar_.py +2 -1
- sciv/plot/_scatter_.py +21 -14
- sciv/preprocessing/_scanpy_.py +7 -1
- sciv/preprocessing/_scvi_.py +6 -0
- sciv/tool/_algorithm_.py +81 -10
- sciv/tool/_matrix_.py +1 -0
- sciv/tool/_random_walk_.py +56 -26
- {sciv-0.0.94.dist-info → sciv-0.0.96.dist-info}/METADATA +1 -1
- {sciv-0.0.94.dist-info → sciv-0.0.96.dist-info}/RECORD +12 -12
- {sciv-0.0.94.dist-info → sciv-0.0.96.dist-info}/WHEEL +0 -0
- {sciv-0.0.94.dist-info → sciv-0.0.96.dist-info}/licenses/LICENSE +0 -0
sciv/plot/_bar_.py
CHANGED
|
@@ -302,7 +302,8 @@ def bar_significance(
|
|
|
302
302
|
:param color_step_size: Step size when cycling through palette
|
|
303
303
|
:param color_type: Name of seaborn palette to use
|
|
304
304
|
:param test: Statistical test for pairwise comparisons
|
|
305
|
-
{"t-test_ind", "t-test_welch", "t-test_paired", "Mann-Whitney", "Mann-Whitney-gt", "Mann-Whitney-ls",
|
|
305
|
+
{"t-test_ind", "t-test_welch", "t-test_paired", "Mann-Whitney", "Mann-Whitney-gt", "Mann-Whitney-ls",
|
|
306
|
+
"Levene", "Wilcoxon", "Kruskal", "Brunner-Munzel"}
|
|
306
307
|
:param ci: Confidence interval type or value
|
|
307
308
|
:param capsize: Width of the error-bar caps
|
|
308
309
|
:param errcolor: Color of the error bars
|
sciv/plot/_scatter_.py
CHANGED
|
@@ -8,6 +8,7 @@ import numpy as np
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from anndata import AnnData
|
|
10
10
|
from matplotlib import pyplot as plt
|
|
11
|
+
from matplotlib.colors import ListedColormap
|
|
11
12
|
from pandas import DataFrame
|
|
12
13
|
import seaborn as sns
|
|
13
14
|
|
|
@@ -165,8 +166,10 @@ def scatter_3d(
|
|
|
165
166
|
title: str = None,
|
|
166
167
|
width: float = 7,
|
|
167
168
|
height: float = 7,
|
|
168
|
-
|
|
169
|
-
|
|
169
|
+
elev: float = 30,
|
|
170
|
+
azim: float = -60,
|
|
171
|
+
is_add_legend: bool = True,
|
|
172
|
+
cmap: Union[str, ListedColormap] = 'tab20',
|
|
170
173
|
font_size: int = 14,
|
|
171
174
|
edge_color: str = None,
|
|
172
175
|
size: Union[float, collection] = 0.1,
|
|
@@ -180,8 +183,8 @@ def scatter_3d(
|
|
|
180
183
|
ul.log(__name__).error(f"At least one of the `output` and `show` parameters is required")
|
|
181
184
|
raise ValueError(f"At least one of the `output` and `show` parameters is required")
|
|
182
185
|
|
|
183
|
-
fig
|
|
184
|
-
fig.
|
|
186
|
+
fig = plt.figure(figsize=(width, height))
|
|
187
|
+
ax = fig.add_subplot(projection='3d')
|
|
185
188
|
|
|
186
189
|
hue_cat = pd.Categorical(df[hue])
|
|
187
190
|
|
|
@@ -196,6 +199,9 @@ def scatter_3d(
|
|
|
196
199
|
**kwargs
|
|
197
200
|
)
|
|
198
201
|
|
|
202
|
+
# angle of view
|
|
203
|
+
ax.view_init(elev=elev, azim=azim)
|
|
204
|
+
|
|
199
205
|
if x_name is not None:
|
|
200
206
|
ax.set_xlabel(x_name, fontsize=font_size)
|
|
201
207
|
|
|
@@ -208,16 +214,17 @@ def scatter_3d(
|
|
|
208
214
|
if title is not None:
|
|
209
215
|
ax.set_title(title, fontsize=font_size)
|
|
210
216
|
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
217
|
+
if is_add_legend:
|
|
218
|
+
unique_types = hue_cat.categories
|
|
219
|
+
legend_elements = [
|
|
220
|
+
plt.Line2D(
|
|
221
|
+
[0], [0], marker='o', color='w', label=type_,
|
|
222
|
+
markerfacecolor=scatter.cmap(scatter.norm(i))
|
|
223
|
+
)
|
|
224
|
+
for i, type_ in enumerate(unique_types)
|
|
225
|
+
]
|
|
226
|
+
|
|
227
|
+
ax.legend(handles=legend_elements, title=legend_name, loc='upper left')
|
|
221
228
|
|
|
222
229
|
plot_end(fig, None, None, None, output, show, close)
|
|
223
230
|
|
sciv/preprocessing/_scanpy_.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# -*- coding: UTF-8 -*-
|
|
2
|
-
|
|
2
|
+
import time
|
|
3
3
|
import warnings
|
|
4
4
|
from typing import Optional, Literal
|
|
5
5
|
|
|
@@ -45,6 +45,10 @@ def filter_data(
|
|
|
45
45
|
:param is_min_peak: Whether to screen peaks
|
|
46
46
|
:return: scATAC-seq data
|
|
47
47
|
"""
|
|
48
|
+
|
|
49
|
+
# start time
|
|
50
|
+
start_time = time.time()
|
|
51
|
+
|
|
48
52
|
import scanpy as sc
|
|
49
53
|
|
|
50
54
|
ul.log(__name__).info("Filter scATAC-seq data")
|
|
@@ -105,6 +109,8 @@ def filter_data(
|
|
|
105
109
|
)
|
|
106
110
|
ul.log(__name__).info(f"Size of filtered scATAC-seq data: {filter_adata.shape}")
|
|
107
111
|
filter_adata.uns["step"] = 0
|
|
112
|
+
filter_adata.uns["elapsed_time"] = time.time() - start_time
|
|
113
|
+
|
|
108
114
|
return filter_adata
|
|
109
115
|
|
|
110
116
|
|
sciv/preprocessing/_scvi_.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# -*- coding: UTF-8 -*-
|
|
2
2
|
|
|
3
3
|
import os
|
|
4
|
+
import time
|
|
4
5
|
import warnings
|
|
5
6
|
from typing import Optional
|
|
6
7
|
|
|
@@ -49,6 +50,8 @@ def poisson_vi(
|
|
|
49
50
|
"""
|
|
50
51
|
ul.log(__name__).info("Start PoissonVI")
|
|
51
52
|
|
|
53
|
+
start_time = time.time()
|
|
54
|
+
|
|
52
55
|
import scvi
|
|
53
56
|
import scanpy as sc
|
|
54
57
|
|
|
@@ -232,11 +235,14 @@ def poisson_vi(
|
|
|
232
235
|
|
|
233
236
|
obs = pd.DataFrame(clusters_list, columns=["id"])
|
|
234
237
|
obs.index = obs["id"].astype(str)
|
|
238
|
+
|
|
235
239
|
da_peaks_adata = AnnData(matrix_ee, obs=obs, var=adata.var)
|
|
236
240
|
da_peaks_adata.layers["bayes_factor"] = matrix_bf
|
|
237
241
|
da_peaks_adata.layers["emp_prob1"] = matrix_ep1
|
|
238
242
|
da_peaks_adata.uns["latent_name"] = latent_name
|
|
239
243
|
da_peaks_adata.uns["dp_delta"] = dp_delta
|
|
244
|
+
da_peaks_adata.uns["elapsed_time"] = time.time() - start_time
|
|
240
245
|
|
|
241
246
|
adata.uns["step"] = 1
|
|
247
|
+
|
|
242
248
|
return da_peaks_adata
|
sciv/tool/_algorithm_.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# -*- coding: UTF-8 -*-
|
|
2
2
|
|
|
3
3
|
import random
|
|
4
|
+
import time
|
|
4
5
|
from typing import Union, Tuple, Literal, Optional
|
|
5
6
|
|
|
6
7
|
from scipy import sparse
|
|
@@ -903,6 +904,8 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
|
|
|
903
904
|
:return: overlap data
|
|
904
905
|
"""
|
|
905
906
|
|
|
907
|
+
start_time = time.time()
|
|
908
|
+
|
|
906
909
|
# Unique feature set
|
|
907
910
|
label_all = regions.var.index.tolist()
|
|
908
911
|
# Peak number
|
|
@@ -988,6 +991,8 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
|
|
|
988
991
|
|
|
989
992
|
overlap_adata = AnnData(overlap_sparse, var=trait_info, obs=regions.var)
|
|
990
993
|
overlap_adata.uns["is_overlap"] = True
|
|
994
|
+
overlap_adata.uns["elapsed_time"] = time.time() - start_time
|
|
995
|
+
|
|
991
996
|
return overlap_adata
|
|
992
997
|
|
|
993
998
|
|
|
@@ -1069,6 +1074,7 @@ def calculate_init_score_weight(
|
|
|
1069
1074
|
adata: AnnData,
|
|
1070
1075
|
da_peaks_adata: AnnData,
|
|
1071
1076
|
overlap_adata: AnnData,
|
|
1077
|
+
layer: Optional[str] = "fragments",
|
|
1072
1078
|
diff_peak_value: difference_peak_optional = 'emp_effect',
|
|
1073
1079
|
is_simple: bool = True,
|
|
1074
1080
|
block_size: int = -1
|
|
@@ -1078,6 +1084,7 @@ def calculate_init_score_weight(
|
|
|
1078
1084
|
:param adata: scATAC-seq data;
|
|
1079
1085
|
:param da_peaks_adata: Differential peak data;
|
|
1080
1086
|
:param overlap_adata: Peaks-traits/diseases data;
|
|
1087
|
+
:param layer: The layer value of scATAC-seq data;
|
|
1081
1088
|
:param diff_peak_value: Specify the correction value in peak correction of clustering type differences.
|
|
1082
1089
|
{'emp_effect', 'bayes_factor', 'emp_prob1', 'all'}
|
|
1083
1090
|
:param is_simple: True represents not adding unnecessary intermediate variables, only adding the final result. It
|
|
@@ -1087,17 +1094,30 @@ def calculate_init_score_weight(
|
|
|
1087
1094
|
If the value is less than or equal to zero, no block operation will be performed
|
|
1088
1095
|
:return: Initial TRS with weight.
|
|
1089
1096
|
"""
|
|
1097
|
+
|
|
1098
|
+
start_time = time.time()
|
|
1099
|
+
|
|
1090
1100
|
if "is_overlap" not in overlap_adata.uns:
|
|
1091
1101
|
ul.log(__name__).warning(
|
|
1092
|
-
"The `is_overlap` is not in `overlap_data.uns`.
|
|
1102
|
+
"The `is_overlap` is not in `overlap_data.uns`. "
|
|
1103
|
+
"(Suggest using the 'tl.overlap_stum' function to obtain the result.)"
|
|
1093
1104
|
)
|
|
1094
1105
|
|
|
1095
1106
|
if "dp_delta" not in da_peaks_adata.uns:
|
|
1096
1107
|
ul.log(__name__).warning(
|
|
1097
|
-
"The `dp_delta` is not in `da_peaks_adata.uns`.
|
|
1108
|
+
"The `dp_delta` is not in `da_peaks_adata.uns`. "
|
|
1109
|
+
"(Suggest using the 'pp.poisson_vi' function to obtain the result.)"
|
|
1098
1110
|
)
|
|
1099
1111
|
|
|
1100
|
-
|
|
1112
|
+
if layer is not None and layer not in adata.layers:
|
|
1113
|
+
ul.log(__name__).error(
|
|
1114
|
+
f"The `layer` parameter is empty or one of the element values of `adata.layers` ({adata.layers})."
|
|
1115
|
+
)
|
|
1116
|
+
raise ValueError(
|
|
1117
|
+
f"The `layer` parameter is empty or one of the element values of `adata.layers` ({adata.layers})."
|
|
1118
|
+
)
|
|
1119
|
+
|
|
1120
|
+
fragments = adata.layers[layer] if layer is not None else adata.X
|
|
1101
1121
|
cell_anno = adata.obs
|
|
1102
1122
|
del adata
|
|
1103
1123
|
|
|
@@ -1174,7 +1194,8 @@ def calculate_init_score_weight(
|
|
|
1174
1194
|
del _cluster_weight_
|
|
1175
1195
|
|
|
1176
1196
|
ul.log(__name__).info("Broadcasting the weight factor to the cellular level")
|
|
1177
|
-
_cell_type_weight_ = np.zeros((cell_anno.shape[0], da_peaks_adata.obsm["cluster_weight"].shape[1]),
|
|
1197
|
+
_cell_type_weight_ = np.zeros((cell_anno.shape[0], da_peaks_adata.obsm["cluster_weight"].shape[1]),
|
|
1198
|
+
dtype=np.float32)
|
|
1178
1199
|
|
|
1179
1200
|
cluster_series = cell_anno["clusters"]
|
|
1180
1201
|
|
|
@@ -1195,15 +1216,44 @@ def calculate_init_score_weight(
|
|
|
1195
1216
|
del _init_trs_ncw_, _cell_type_weight_
|
|
1196
1217
|
|
|
1197
1218
|
init_trs_adata.uns["is_sample"] = is_simple
|
|
1219
|
+
init_trs_adata.uns["elapsed_time"] = time.time() - start_time
|
|
1198
1220
|
return init_trs_adata
|
|
1199
1221
|
|
|
1200
1222
|
|
|
1223
|
+
def adaptive_gamma_knn(data: matrix_data, k: int = 10):
|
|
1224
|
+
"""
|
|
1225
|
+
Adaptive gamma parameter based on k-nearest neighbors
|
|
1226
|
+
:param data: Data matrix (n_samples, n_features)
|
|
1227
|
+
:param k: Number of neighbors, usually select 5-20
|
|
1228
|
+
:return: Gamma value for each sample (n_samples,)
|
|
1229
|
+
"""
|
|
1230
|
+
|
|
1231
|
+
from sklearn.neighbors import NearestNeighbors
|
|
1232
|
+
|
|
1233
|
+
# Calculate the distance from each point to its k-th nearest neighbor
|
|
1234
|
+
knn = NearestNeighbors(n_neighbors=k + 1).fit(data) # +1 because it includes itself
|
|
1235
|
+
distances, _ = knn.kneighbors(data)
|
|
1236
|
+
|
|
1237
|
+
# Take the distance of the k-th nearest neighbor (index k, because 0 is itself)
|
|
1238
|
+
kth_distances = distances[:, k]
|
|
1239
|
+
|
|
1240
|
+
# Avoid division by zero (if the distance is 0, set it to a very small value)
|
|
1241
|
+
kth_distances[kth_distances == 0] = np.finfo(float).eps
|
|
1242
|
+
|
|
1243
|
+
# Calculate local gamma: gamma = 1 / (2 * sigma^2), where sigma = kth_distance
|
|
1244
|
+
gammas = 1.0 / (kth_distances ** 2)
|
|
1245
|
+
|
|
1246
|
+
return gammas
|
|
1247
|
+
|
|
1248
|
+
|
|
1201
1249
|
def obtain_cell_cell_network(
|
|
1202
1250
|
adata: AnnData,
|
|
1203
1251
|
k: int = 30,
|
|
1204
1252
|
or_k: int = 1,
|
|
1205
1253
|
weight: float = 0.1,
|
|
1206
|
-
|
|
1254
|
+
kernel: Literal["laplacian", "gaussian"] = "gaussian",
|
|
1255
|
+
local_k: int = 10,
|
|
1256
|
+
gamma: Optional[float, collection] = None,
|
|
1207
1257
|
is_simple: bool = True
|
|
1208
1258
|
) -> AnnData:
|
|
1209
1259
|
"""
|
|
@@ -1212,14 +1262,19 @@ def obtain_cell_cell_network(
|
|
|
1212
1262
|
:param k: When building an M-KNN network, the number of nodes connected by each node (and);
|
|
1213
1263
|
:param or_k: When building an M-KNN network, the number of nodes connected by each node (or);
|
|
1214
1264
|
:param weight: The weight of interactions or operations;
|
|
1215
|
-
:param
|
|
1265
|
+
:param local_k: Determining the number of neighbors for the adaptive kernel;
|
|
1266
|
+
:param kernel: Determine the kernel function to be used;
|
|
1267
|
+
:param gamma: If None, it defaults to the adaptive value obtained through the local information of
|
|
1268
|
+
parameter `local_k`. Otherwise, it should be strictly positive;
|
|
1216
1269
|
:param is_simple: True represents not adding unnecessary intermediate variables, only adding the final result.
|
|
1217
1270
|
It is worth noting that when set to `True`, the `is_ablation` parameter will become invalid, and when set to
|
|
1218
1271
|
`False`, `is_ablation` will only take effect;
|
|
1219
1272
|
:return: Cell similarity data.
|
|
1220
1273
|
"""
|
|
1221
1274
|
|
|
1222
|
-
|
|
1275
|
+
start_time = time.time()
|
|
1276
|
+
|
|
1277
|
+
from sklearn.metrics.pairwise import laplacian_kernel, rbf_kernel
|
|
1223
1278
|
|
|
1224
1279
|
# data
|
|
1225
1280
|
if "poisson_vi" not in adata.uns.keys():
|
|
@@ -1232,6 +1287,14 @@ def obtain_cell_cell_network(
|
|
|
1232
1287
|
"the `poisson_vi` function."
|
|
1233
1288
|
)
|
|
1234
1289
|
|
|
1290
|
+
if kernel not in ["laplacian", "gaussian"]:
|
|
1291
|
+
ul.log(__name__).error("Parameter `kernel` only supports two values, `laplacian` and `gaussian`.")
|
|
1292
|
+
raise ValueError("Parameter `kernel` only supports two values, `laplacian` and `gaussian`.")
|
|
1293
|
+
|
|
1294
|
+
if local_k <= 0:
|
|
1295
|
+
ul.log(__name__).error("The `local_k` parameter must be a natural number greater than 0.")
|
|
1296
|
+
raise ValueError("The `local_k` parameter must be a natural number greater than 0.")
|
|
1297
|
+
|
|
1235
1298
|
_latent_name_ = "latent" if adata.uns["poisson_vi"]["latent_name"] is None \
|
|
1236
1299
|
else adata.uns["poisson_vi"]["latent_name"]
|
|
1237
1300
|
|
|
@@ -1240,9 +1303,15 @@ def obtain_cell_cell_network(
|
|
|
1240
1303
|
cell_anno = adata.obs
|
|
1241
1304
|
del adata
|
|
1242
1305
|
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1306
|
+
if gamma is None:
|
|
1307
|
+
gamma = adaptive_gamma_knn(latent, k=local_k)
|
|
1308
|
+
|
|
1309
|
+
if kernel == "kernel":
|
|
1310
|
+
ul.log(__name__).info("Laplacian kernel")
|
|
1311
|
+
cell_affinity = laplacian_kernel(latent, gamma=gamma).astype(np.float32)
|
|
1312
|
+
else:
|
|
1313
|
+
ul.log(__name__).info("Gaussian (RBF) kernel")
|
|
1314
|
+
cell_affinity = rbf_kernel(latent, gamma=gamma).astype(np.float32)
|
|
1246
1315
|
|
|
1247
1316
|
# Define KNN network
|
|
1248
1317
|
cell_mutual_knn_weight, cell_mutual_knn = semi_mutual_knn_weight(
|
|
@@ -1262,6 +1331,8 @@ def obtain_cell_cell_network(
|
|
|
1262
1331
|
if not is_simple:
|
|
1263
1332
|
cc_data.layers["cell_mutual_knn"] = to_sparse(cell_mutual_knn)
|
|
1264
1333
|
|
|
1334
|
+
cc_data.uns["elapsed_time"] = time.time() - start_time
|
|
1335
|
+
|
|
1265
1336
|
return cc_data
|
|
1266
1337
|
|
|
1267
1338
|
|
sciv/tool/_matrix_.py
CHANGED
sciv/tool/_random_walk_.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# -*- coding: UTF-8 -*-
|
|
2
2
|
|
|
3
3
|
import math
|
|
4
|
+
import time
|
|
4
5
|
from typing import Union, Tuple, Literal
|
|
5
6
|
|
|
6
7
|
import torch
|
|
@@ -222,7 +223,9 @@ def random_walk(
|
|
|
222
223
|
|
|
223
224
|
return np.column_stack(results)
|
|
224
225
|
else:
|
|
225
|
-
ul.log(__name__).error(
|
|
226
|
+
ul.log(__name__).error(
|
|
227
|
+
f'The `device` ({device}) is not supported. Only supports "cpu", "gpu", and "auto" values.'
|
|
228
|
+
)
|
|
226
229
|
raise ValueError(f'The `device` ({device}) is not supported. Only supports "cpu", "gpu", and "auto" values.')
|
|
227
230
|
|
|
228
231
|
|
|
@@ -267,17 +270,20 @@ class RandomWalk:
|
|
|
267
270
|
:param max_seed_cell_rate: The maximum percentage of seed cells in all cells.
|
|
268
271
|
:param credible_threshold: The threshold for determining the credibility of enriched cells in the context of
|
|
269
272
|
enrichment, i.e. the threshold for judging enriched cells;
|
|
270
|
-
:param enrichment_threshold: Only by setting a threshold for the standardized output TRS can a portion of the
|
|
271
|
-
results be obtained. Parameters support string types {'golden', 'half', 'e', 'pi', 'none'}, or
|
|
272
|
-
within the range of (0, log1p(1)).
|
|
273
|
-
:param is_ablation: True represents obtaining the results of the ablation experiment. This parameter is limited
|
|
274
|
-
the `is_simple` parameter, and its effectiveness requires setting `is_simple` to `False`;
|
|
273
|
+
:param enrichment_threshold: Only by setting a threshold for the standardized output TRS can a portion of the
|
|
274
|
+
enrichment results be obtained. Parameters support string types {'golden', 'half', 'e', 'pi', 'none'}, or
|
|
275
|
+
valid floating-point types within the range of (0, log1p(1)).
|
|
276
|
+
:param is_ablation: True represents obtaining the results of the ablation experiment. This parameter is limited
|
|
277
|
+
by the `is_simple` parameter, and its effectiveness requires setting `is_simple` to `False`;
|
|
275
278
|
:param is_simple: True represents not adding unnecessary intermediate variables, only adding the final result.
|
|
276
|
-
It is worth noting that when set to `True`, the `is_ablation` parameter will become invalid, and when set
|
|
277
|
-
`False`, `is_ablation` will only take effect;
|
|
279
|
+
It is worth noting that when set to `True`, the `is_ablation` parameter will become invalid, and when set
|
|
280
|
+
to `False`, `is_ablation` will only take effect;
|
|
278
281
|
:return: Stable distribution score.
|
|
279
282
|
"""
|
|
280
283
|
ul.log(__name__).info("Random walk.")
|
|
284
|
+
|
|
285
|
+
start_time = time.time()
|
|
286
|
+
|
|
281
287
|
# judge length
|
|
282
288
|
if cc_adata.shape[0] != init_status.shape[0]:
|
|
283
289
|
ul.log(__name__).error(
|
|
@@ -311,8 +317,12 @@ class RandomWalk:
|
|
|
311
317
|
raise ValueError("The parameter of `epsilon` must be greater than zero.")
|
|
312
318
|
|
|
313
319
|
if "clusters" not in init_status.obs.columns:
|
|
314
|
-
ul.log(__name__).error(
|
|
315
|
-
|
|
320
|
+
ul.log(__name__).error(
|
|
321
|
+
"Unsupervised clustering information must be included in column `clusters` of `init_datus.obs`."
|
|
322
|
+
)
|
|
323
|
+
raise ValueError(
|
|
324
|
+
"Unsupervised clustering information must be included in column `clusters` of `init_datus.obs`."
|
|
325
|
+
)
|
|
316
326
|
|
|
317
327
|
init_status.obs["clusters"] = init_status.obs["clusters"].astype(str)
|
|
318
328
|
|
|
@@ -339,8 +349,13 @@ class RandomWalk:
|
|
|
339
349
|
if isinstance(enrichment_threshold, float):
|
|
340
350
|
|
|
341
351
|
if enrichment_threshold <= 0 or enrichment_threshold >= np.log1p(1):
|
|
342
|
-
ul.log(__name__).warning(
|
|
343
|
-
|
|
352
|
+
ul.log(__name__).warning(
|
|
353
|
+
"The `enrichment_threshold` parameter is not set within the range of (0, log1p(1)), "
|
|
354
|
+
"this parameter will become invalid."
|
|
355
|
+
)
|
|
356
|
+
ul.log(__name__).warning(
|
|
357
|
+
"It is recommended to set the `enrichment_threshold` parameter to the 'golden' value."
|
|
358
|
+
)
|
|
344
359
|
|
|
345
360
|
self.enrichment_threshold = enrichment_threshold
|
|
346
361
|
elif enrichment_threshold == "golden":
|
|
@@ -356,8 +371,9 @@ class RandomWalk:
|
|
|
356
371
|
self.enrichment_threshold = np.log1p(1)
|
|
357
372
|
else:
|
|
358
373
|
raise ValueError(
|
|
359
|
-
|
|
360
|
-
|
|
374
|
+
"Invalid enrichment settings. The string type in the `enrichment_threshold` parameter only supports "
|
|
375
|
+
"the following parameter 'golden', 'half', 'e', 'pi', Alternatively, input a floating-point type "
|
|
376
|
+
"value within the range of (0, log1p(1))"
|
|
361
377
|
)
|
|
362
378
|
|
|
363
379
|
# Enrichment judgment
|
|
@@ -443,6 +459,8 @@ class RandomWalk:
|
|
|
443
459
|
del self.cell_affinity
|
|
444
460
|
del init_status
|
|
445
461
|
|
|
462
|
+
self.elapsed_time = time.time() - start_time
|
|
463
|
+
|
|
446
464
|
def _random_walk_(
|
|
447
465
|
self,
|
|
448
466
|
seed_cell_data: matrix_data,
|
|
@@ -465,13 +483,19 @@ class RandomWalk:
|
|
|
465
483
|
w = weight
|
|
466
484
|
|
|
467
485
|
if not self.is_gpu_available:
|
|
468
|
-
return random_walk(
|
|
486
|
+
return random_walk(
|
|
487
|
+
seed_cell_data, weight=w, gamma=gamma, epsilon=self.epsilon, p=self.p, n_jobs=self.n_jobs, device='cpu'
|
|
488
|
+
)
|
|
469
489
|
|
|
470
490
|
try:
|
|
471
|
-
_data_ = random_walk(
|
|
491
|
+
_data_ = random_walk(
|
|
492
|
+
seed_cell_data, weight=w, gamma=gamma, epsilon=self.epsilon, p=self.p, n_jobs=self.n_jobs, device=device
|
|
493
|
+
)
|
|
472
494
|
except Exception as e:
|
|
473
495
|
ul.log(__name__).warning(f"GPU failed to run, try to switch to CPU running.\n {e}")
|
|
474
|
-
_data_ = random_walk(
|
|
496
|
+
_data_ = random_walk(
|
|
497
|
+
seed_cell_data, weight=w, gamma=gamma, epsilon=self.epsilon, p=self.p, n_jobs=self.n_jobs, device='cpu'
|
|
498
|
+
)
|
|
475
499
|
|
|
476
500
|
return _data_
|
|
477
501
|
|
|
@@ -548,8 +572,9 @@ class RandomWalk:
|
|
|
548
572
|
|
|
549
573
|
def _get_seed_cell_clustering_weight_(self, seed_cell_index: collection) -> Tuple[collection, dict]:
|
|
550
574
|
"""
|
|
551
|
-
This function is used to obtain the percentage of seed cells that occupy this cell type, i.e., the seed cell
|
|
552
|
-
The purpose of this weight is to provide fair enrichment opportunities for those with fewer
|
|
575
|
+
This function is used to obtain the percentage of seed cells that occupy this cell type, i.e., the seed cell
|
|
576
|
+
clustering weight. The purpose of this weight is to provide fair enrichment opportunities for those with fewer
|
|
577
|
+
cell numbers in cell clustering types.
|
|
553
578
|
:param seed_cell_index: Index of seed cells.
|
|
554
579
|
:return: The seed cell clustering weight, equity factor.
|
|
555
580
|
"""
|
|
@@ -604,14 +629,17 @@ class RandomWalk:
|
|
|
604
629
|
:param info: Log information about seed cells
|
|
605
630
|
:return:
|
|
606
631
|
1. Set seed cell thresholds for each trait or disease.
|
|
607
|
-
2. Seed cell weights obtained for each trait or disease based on the `init_data` parameter, with each seed
|
|
608
|
-
Note that this only takes effect when `is_simple` is true.
|
|
609
|
-
3. Seed cell weights obtained for each trait or disease based on the init_data parameter, and the weight of
|
|
632
|
+
2. Seed cell weights obtained for each trait or disease based on the `init_data` parameter, with each seed
|
|
633
|
+
cell assigned the same weight. Note that this only takes effect when `is_simple` is true.
|
|
634
|
+
3. Seed cell weights obtained for each trait or disease based on the init_data parameter, and the weight of
|
|
635
|
+
each seed cell will be assigned based on the similarity between cells.
|
|
610
636
|
4. Seed cell index, which will be used for later knockout or knockdown prediction.
|
|
611
|
-
5. Based on the init_data parameter, a reference seed cell weight is obtained for enrichment analysis
|
|
637
|
+
5. Based on the init_data parameter, a reference seed cell weight is obtained for enrichment analysis
|
|
638
|
+
assistance for each trait or disease, and each seed cell is assigned the same weight.
|
|
612
639
|
Note that this only takes effect when `is_simple` is true.
|
|
613
|
-
6. Reference seed cell weights for auxiliary enrichment analysis of each trait or disease based on the
|
|
614
|
-
and the weight of each seed cell will be assigned based on the similarity
|
|
640
|
+
6. Reference seed cell weights for auxiliary enrichment analysis of each trait or disease based on the
|
|
641
|
+
init_data parameter, and the weight of each seed cell will be assigned based on the similarity
|
|
642
|
+
between cells.
|
|
615
643
|
"""
|
|
616
644
|
|
|
617
645
|
if init_data is None:
|
|
@@ -633,7 +661,9 @@ class RandomWalk:
|
|
|
633
661
|
seed_cell_matrix = np.zeros((1, 1))
|
|
634
662
|
seed_cell_matrix_en = np.zeros((1, 1))
|
|
635
663
|
|
|
636
|
-
ul.log(__name__).info(
|
|
664
|
+
ul.log(__name__).info(
|
|
665
|
+
f"Calculate {n_traits} traits/diseases for seed cells information.{f' ({info})' if info else ''}"
|
|
666
|
+
)
|
|
637
667
|
|
|
638
668
|
trait_values_all = to_dense(init_data.X, is_array=True)
|
|
639
669
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sciv
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.96
|
|
4
4
|
Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
|
|
5
5
|
Project-URL: github, https://github.com/YuZhengM/sciv
|
|
6
6
|
Author-email: Zheng-Min Yu <yuzmbio@163.com>
|
|
@@ -3,9 +3,9 @@ sciv/file/__init__.py,sha256=8cYLG0S0nilblmyX46CWFrbLr-rmLbO1EEO477pZ-gk,520
|
|
|
3
3
|
sciv/file/_read_.py,sha256=UZJpN3_5hBiTjzEYO6YXORcE_dqA8HmLpV80nqTLNSo,30554
|
|
4
4
|
sciv/file/_write_.py,sha256=W3M9CmPi7BuKAffz1fdi-vA5DzAFZ7wmcggp33N9Xtg,7848
|
|
5
5
|
sciv/model/__init__.py,sha256=k8SO9FpJaGn2ANqJyaz3HXMas7jH9toPVtpw703kOqg,149
|
|
6
|
-
sciv/model/_core_.py,sha256=
|
|
6
|
+
sciv/model/_core_.py,sha256=3GjKG5w-cTBF53LpSrFrMFnqwtgMI2_BkSlLGFMMMj8,33255
|
|
7
7
|
sciv/plot/__init__.py,sha256=2tRNT6TZNz9r38lnna712RGsH7OJ2QkGa37XKgzejHQ,1865
|
|
8
|
-
sciv/plot/_bar_.py,sha256=
|
|
8
|
+
sciv/plot/_bar_.py,sha256=xWpFbJTHgQMLuUSXa4uE69RGHXSCGinapxL-1imdDcU,14355
|
|
9
9
|
sciv/plot/_barcode_.py,sha256=RDOedQ8ZtXWFyJ2c772RDfqO4TMIpHMvcMZMAVqky90,5073
|
|
10
10
|
sciv/plot/_box_.py,sha256=485YNmSBj2IB7JlRHaKHMVj0yr0Pz02B3-r57SJqtMk,6009
|
|
11
11
|
sciv/plot/_bubble_.py,sha256=vwBs-voOdkBfyMho5w4Rt68b8_Dw1C3NClLdauonC4U,1066
|
|
@@ -16,24 +16,24 @@ sciv/plot/_kde_.py,sha256=9VC6DKHbQAzKenz4DoiL5LgUzEJBtAWXXPCLxOV74kY,2459
|
|
|
16
16
|
sciv/plot/_line_.py,sha256=P944YoGxSocHncjtme4n_iIPK9sghNW3-Xee6Tyy4HE,3850
|
|
17
17
|
sciv/plot/_pie_.py,sha256=OVPv85MdjkjABh-uP5Y-KVk1VygFIzz87QT3OE1hZjU,6399
|
|
18
18
|
sciv/plot/_radar_.py,sha256=g_abzmzibJIR6it59TendUI236inbzYl0IzzdoA3Uuc,6304
|
|
19
|
-
sciv/plot/_scatter_.py,sha256=
|
|
19
|
+
sciv/plot/_scatter_.py,sha256=HPy1eFqDk4vksAiKjn_Qk00fYO4GyqRs1O3ZZh44Ezw,16998
|
|
20
20
|
sciv/plot/_venn_.py,sha256=TfNTuxog2pT7sicKBEEMtleoHXwnenbl3CMWJu9c2vs,2675
|
|
21
21
|
sciv/plot/_violin_.py,sha256=40LYeHFYyNL1XbKHTjmwLT0zzh7o5emqN9Gl2sbl-DA,6458
|
|
22
22
|
sciv/preprocessing/__init__.py,sha256=56RgDai5I3sZ4hl3aaV80ogOeUscYsU3nJUWE80jZ-k,742
|
|
23
23
|
sciv/preprocessing/_anndata_.py,sha256=3d1cHFs1YA9UZkIPE089nZHFi-DjK-c1fRyi2shfSh4,6302
|
|
24
24
|
sciv/preprocessing/_gencode_.py,sha256=HKhRgK76khGepdv4FaKiOvTys1uJTbvIyrKUta5W0K8,2108
|
|
25
25
|
sciv/preprocessing/_gsea_.py,sha256=AH_PpUNfMN7WkF0pqAuUhEC6qZwKrtQm6VaaYu6JLfI,3803
|
|
26
|
-
sciv/preprocessing/_scanpy_.py,sha256=
|
|
27
|
-
sciv/preprocessing/_scvi_.py,sha256=
|
|
26
|
+
sciv/preprocessing/_scanpy_.py,sha256=tB8BD2wpLAU8_YxdqrgNtcjpNXNRo-JCdm2lxaKDBLc,11611
|
|
27
|
+
sciv/preprocessing/_scvi_.py,sha256=7QxwPA2kR_g15X28aEak7AFA4kyQ-UbtpiLH-rc5Ksg,10780
|
|
28
28
|
sciv/preprocessing/_snapatac_.py,sha256=Dq8CHF7Psl3CQszaEokQYO56Oe2uzyWOy_cGlaOywfc,27798
|
|
29
29
|
sciv/tool/__init__.py,sha256=WXzHkWt6RgBC3qqD-98nR5wQmt6oC850ox_VpMrapSU,2468
|
|
30
|
-
sciv/tool/_algorithm_.py,sha256=
|
|
31
|
-
sciv/tool/_matrix_.py,sha256=
|
|
32
|
-
sciv/tool/_random_walk_.py,sha256=
|
|
30
|
+
sciv/tool/_algorithm_.py,sha256=uzA__knaIgIMcnFK-JwKT4sIbCETh94eYH-2WB_BpXc,52728
|
|
31
|
+
sciv/tool/_matrix_.py,sha256=SnC3sXic_ufuEXStcD_HncvYH6apBdNK6nhG6jFLmjA,24324
|
|
32
|
+
sciv/tool/_random_walk_.py,sha256=JOB97XLxlZYHvlIST1wlXgA0mw6fybkWnJGq6X_kbsk,48871
|
|
33
33
|
sciv/util/__init__.py,sha256=nOxZ8if27X7AUJ6hZwTwxOJwIBJb0obWlHjqCzjg_Gc,1964
|
|
34
34
|
sciv/util/_constant_.py,sha256=w0wKQd8guLd1ZTW24_5aECrWsIWDiNQmEpLsWlHar1A,3000
|
|
35
35
|
sciv/util/_core_.py,sha256=hF33ybPcoVlapZsm-2Etem-p_rQUqXlsdaQgZv5jD7w,14867
|
|
36
|
-
sciv-0.0.
|
|
37
|
-
sciv-0.0.
|
|
38
|
-
sciv-0.0.
|
|
39
|
-
sciv-0.0.
|
|
36
|
+
sciv-0.0.96.dist-info/METADATA,sha256=xLbV5NRQL7Q3aA25Peb8Idk1PjZ7rM6yXlJsH7VM_OI,3465
|
|
37
|
+
sciv-0.0.96.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
38
|
+
sciv-0.0.96.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
|
|
39
|
+
sciv-0.0.96.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|