sciv 0.0.88__py3-none-any.whl → 0.0.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sciv/tool/_algorithm_.py +67 -49
- sciv/tool/_random_walk_.py +23 -6
- {sciv-0.0.88.dist-info → sciv-0.0.90.dist-info}/METADATA +1 -1
- {sciv-0.0.88.dist-info → sciv-0.0.90.dist-info}/RECORD +6 -6
- {sciv-0.0.88.dist-info → sciv-0.0.90.dist-info}/WHEEL +0 -0
- {sciv-0.0.88.dist-info → sciv-0.0.90.dist-info}/licenses/LICENSE +0 -0
sciv/tool/_algorithm_.py
CHANGED
|
@@ -814,7 +814,9 @@ def _overlap_(regions_sort: DataFrame, variants: DataFrame) -> DataFrame:
|
|
|
814
814
|
|
|
815
815
|
variants_overlap_info_list: list = []
|
|
816
816
|
|
|
817
|
-
for index, chr_a, start, end in zip(regions_sort["index"],
|
|
817
|
+
for index, chr_a, start, end in zip(regions_sort["index"],
|
|
818
|
+
regions_sort["chr"],
|
|
819
|
+
regions_sort["start"],
|
|
818
820
|
regions_sort["end"]):
|
|
819
821
|
|
|
820
822
|
# judge chr
|
|
@@ -902,73 +904,89 @@ def overlap_sum(regions: AnnData, variants: dict, trait_info: DataFrame) -> AnnD
|
|
|
902
904
|
"""
|
|
903
905
|
|
|
904
906
|
# Unique feature set
|
|
905
|
-
label_all =
|
|
907
|
+
label_all = regions.var.index.tolist()
|
|
906
908
|
# Peak number
|
|
907
909
|
label_all_size: int = len(label_all)
|
|
908
910
|
|
|
909
|
-
#
|
|
910
|
-
|
|
911
|
+
# Pre-build a dict of peak indices for O(1) lookup
|
|
912
|
+
label2idx = {lb: i for i, lb in enumerate(label_all)}
|
|
911
913
|
|
|
912
|
-
|
|
914
|
+
trait_names = trait_info["id"].tolist()
|
|
915
|
+
n_trait = len(trait_names)
|
|
916
|
+
# Pre-allocate sparse matrix, fill column by column, then convert to csc and then csr for efficiency
|
|
917
|
+
row_indices, col_indices, data_vals = [], [], []
|
|
913
918
|
|
|
914
|
-
|
|
919
|
+
# Check column existence once
|
|
920
|
+
required = {"chr", "start", "end"}
|
|
915
921
|
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
if "chr" not in regions_columns or "start" not in regions_columns or "end" not in regions_columns:
|
|
922
|
+
if not required.issubset(regions.var.columns):
|
|
919
923
|
ul.log(__name__).error(
|
|
920
|
-
f"The peaks information {
|
|
921
|
-
f"`end`. (It is recommended to use the `read_sc_atac` method.)"
|
|
924
|
+
f"The peaks information {regions.var.columns} in data `adata` must include three columns: `chr`, `start` "
|
|
925
|
+
f"and `end`. (It is recommended to use the `read_sc_atac` method.)"
|
|
922
926
|
)
|
|
923
927
|
raise ValueError(
|
|
924
|
-
f"The peaks information {
|
|
925
|
-
f"`end`. (It is recommended to use the `read_sc_atac` method.)"
|
|
928
|
+
f"The peaks information {regions.var.columns} in data `adata` must include three columns: `chr`, `start` "
|
|
929
|
+
f"and `end`. (It is recommended to use the `read_sc_atac` method.)"
|
|
926
930
|
)
|
|
927
931
|
|
|
928
|
-
regions_df =
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
+
regions_df = (
|
|
933
|
+
regions.var
|
|
934
|
+
.reset_index()
|
|
935
|
+
.loc[:, ["index", "chr", "start", "end"]]
|
|
936
|
+
.sort_values(["chr", "start", "end"])
|
|
937
|
+
)
|
|
932
938
|
|
|
933
|
-
ul.log(__name__).info(
|
|
934
|
-
for trait_name in tqdm(trait_names):
|
|
939
|
+
ul.log(__name__).info("Obtain peak-trait/disease matrix. (overlap variant information)")
|
|
935
940
|
|
|
941
|
+
# The outer loop can be further accelerated by parallelizing over traits; here we keep it single-threaded for now.
|
|
942
|
+
for col_idx, trait_name in enumerate(tqdm(trait_names)):
|
|
936
943
|
variant: AnnData = variants[trait_name]
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
# handle overlap data
|
|
940
|
-
overlap_info: DataFrame = _overlap_(regions_df, variant.obs)
|
|
944
|
+
overlap_df: DataFrame = _overlap_(regions_df, variant.obs)
|
|
941
945
|
|
|
942
|
-
if
|
|
946
|
+
if overlap_df.empty:
|
|
943
947
|
continue
|
|
944
948
|
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
949
|
+
# Sum at once: first group by label and collect variant_id into a list
|
|
950
|
+
label_var_ids = (
|
|
951
|
+
overlap_df
|
|
952
|
+
.groupby("index")["variant_id"]
|
|
953
|
+
.apply(list)
|
|
954
|
+
.reset_index()
|
|
950
955
|
)
|
|
951
956
|
|
|
952
|
-
#
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
957
|
+
# Traverse each label, sum once for each variant_id list
|
|
958
|
+
for _, row in label_var_ids.iterrows():
|
|
959
|
+
label = row["index"]
|
|
960
|
+
row_idx = label2idx[label]
|
|
961
|
+
var_ids = row["variant_id"]
|
|
962
|
+
# Sum once for all variant_ids in the list, avoiding row-by-row slicing
|
|
963
|
+
matrix_sum = variant[var_ids, :].X.sum(axis=0)
|
|
964
|
+
|
|
965
|
+
if np.isscalar(matrix_sum):
|
|
966
|
+
matrix_sum = np.asarray(matrix_sum).reshape(1)
|
|
967
|
+
|
|
968
|
+
# Collect non-zero values
|
|
969
|
+
if matrix_sum.size == 1:
|
|
970
|
+
val = float(matrix_sum)
|
|
971
|
+
if val != 0:
|
|
972
|
+
row_indices.append(row_idx)
|
|
973
|
+
col_indices.append(col_idx)
|
|
974
|
+
data_vals.append(val)
|
|
975
|
+
else:
|
|
976
|
+
for t_idx, v in enumerate(matrix_sum):
|
|
977
|
+
if v != 0:
|
|
978
|
+
row_indices.append(row_idx)
|
|
979
|
+
col_indices.append(col_idx + t_idx)
|
|
980
|
+
data_vals.append(float(v))
|
|
981
|
+
|
|
982
|
+
# Build sparse matrix, then convert to csr format
|
|
983
|
+
overlap_sparse = sparse.csc_matrix(
|
|
984
|
+
(data_vals, (row_indices, col_indices)),
|
|
985
|
+
shape=(label_all_size, n_trait),
|
|
986
|
+
dtype=np.float32
|
|
987
|
+
).tocsr()
|
|
988
|
+
|
|
989
|
+
overlap_adata = AnnData(overlap_sparse, var=trait_info, obs=regions.var)
|
|
972
990
|
overlap_adata.uns["is_overlap"] = True
|
|
973
991
|
return overlap_adata
|
|
974
992
|
|
sciv/tool/_random_walk_.py
CHANGED
|
@@ -208,7 +208,19 @@ def random_walk(
|
|
|
208
208
|
|
|
209
209
|
return np.column_stack(results)
|
|
210
210
|
elif device == 'gpu' or (device == 'auto' and availability):
|
|
211
|
-
|
|
211
|
+
|
|
212
|
+
try:
|
|
213
|
+
return _random_walk_gpu_(seed_cell_weight, weight, gamma, epsilon, p, device='gpu')
|
|
214
|
+
except RuntimeError as e:
|
|
215
|
+
ul.log(__name__).warning(f"GPU failed to run, try to switch to CPU running.\n {e}")
|
|
216
|
+
sample_count = seed_cell_weight.shape[1]
|
|
217
|
+
|
|
218
|
+
results = Parallel(n_jobs=n_jobs)(
|
|
219
|
+
delayed(_random_walk_cpu_)(seed_cell_weight[:, i], weight, gamma, epsilon, p)
|
|
220
|
+
for i in tqdm(range(sample_count))
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
return np.column_stack(results)
|
|
212
224
|
else:
|
|
213
225
|
ul.log(__name__).error(f'The `device` ({device}) is not supported. Only supports "cpu", "gpu", and "auto" values.')
|
|
214
226
|
raise ValueError(f'The `device` ({device}) is not supported. Only supports "cpu", "gpu", and "auto" values.')
|
|
@@ -677,7 +689,7 @@ class RandomWalk:
|
|
|
677
689
|
seed_cell_matrix_en[:, i] = seed_cell_en_value / (1 if seed_cell_en_value.sum() == 0 else seed_cell_en_value.sum())
|
|
678
690
|
|
|
679
691
|
# Parallel processing of all traits and real-time display of progress
|
|
680
|
-
Parallel(n_jobs
|
|
692
|
+
Parallel(n_jobs=self.n_jobs, backend='threading')(
|
|
681
693
|
delayed(_process_single_trait)(i) for i in tqdm(self.trait_range, desc="Obtain progress of seed cells with weights")
|
|
682
694
|
)
|
|
683
695
|
|
|
@@ -774,6 +786,7 @@ class RandomWalk:
|
|
|
774
786
|
|
|
775
787
|
score = self._random_walk_core_(seed_cell_data, weight=weight)
|
|
776
788
|
|
|
789
|
+
ul.log(__name__).info("Normalize the results")
|
|
777
790
|
cell_value = self.scale_norm(score)
|
|
778
791
|
|
|
779
792
|
if _layer_label_ == "trs":
|
|
@@ -904,8 +917,8 @@ class RandomWalk:
|
|
|
904
917
|
trs_score = to_dense(self.trs_adata.X if label == "run_en" else self.trs_adata.layers[_trs_layer_label_], is_array=True)
|
|
905
918
|
|
|
906
919
|
# Initialize enriched container
|
|
907
|
-
trait_cell_enrichment = np.zeros(self.trs_adata.shape)
|
|
908
|
-
trait_cell_credible = np.zeros(self.trs_adata.shape)
|
|
920
|
+
trait_cell_enrichment = np.zeros(self.trs_adata.shape).astype(int)
|
|
921
|
+
trait_cell_credible = np.zeros(self.trs_adata.shape).astype(np.float32)
|
|
909
922
|
|
|
910
923
|
ul.log(__name__).info(f"Calculate {len(self.trait_list)} traits/diseases for process `{label}`. (Enrichment-random walk)")
|
|
911
924
|
# Random walk
|
|
@@ -916,8 +929,9 @@ class RandomWalk:
|
|
|
916
929
|
)
|
|
917
930
|
|
|
918
931
|
ul.log(__name__).info(f"Calculate {len(self.trait_list)} traits/diseases for process `{label}`. (Enrichment-score)")
|
|
919
|
-
for i in tqdm(self.trait_range):
|
|
920
932
|
|
|
933
|
+
# Process each trait in parallel
|
|
934
|
+
def _process_trait(i):
|
|
921
935
|
# Random walk
|
|
922
936
|
cell_value = cell_value_data[:, i]
|
|
923
937
|
|
|
@@ -940,7 +954,10 @@ class RandomWalk:
|
|
|
940
954
|
trait_cell_enrichment[:, i][cell_value_credible > self.credible_threshold] = 1
|
|
941
955
|
trait_cell_credible[:, i] = cell_value_credible
|
|
942
956
|
|
|
943
|
-
|
|
957
|
+
# Process each trait in parallel, backend='threading' can effectively prevent the read-only parameter issue caused by copying in loky multi-process mode
|
|
958
|
+
Parallel(n_jobs=self.n_jobs, backend='threading')(delayed(_process_trait)(i) for i in tqdm(self.trait_range))
|
|
959
|
+
|
|
960
|
+
self.trs_adata.layers[_layer_label_] = to_sparse(trait_cell_enrichment)
|
|
944
961
|
|
|
945
962
|
if not self.is_simple:
|
|
946
963
|
self.trs_adata.layers[f"credible_{_layer_label_}"] = to_sparse(trait_cell_credible)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sciv
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.90
|
|
4
4
|
Summary: Unveiling the pivotal cell types involved in variant function regulation at a single-cell resolution
|
|
5
5
|
Project-URL: github, https://github.com/YuZhengM/sciv
|
|
6
6
|
Author-email: Zheng-Min Yu <yuzmbio@163.com>
|
|
@@ -27,13 +27,13 @@ sciv/preprocessing/_scanpy_.py,sha256=flC10W5YPgJE5Ccxt5hP8qW3Q3cOi8DZ36Z1j9D3oN
|
|
|
27
27
|
sciv/preprocessing/_scvi_.py,sha256=ZIDkQ_4deYmzSMiAbu5C3j_jMMl7hBTFLCBXHCNj3B4,10332
|
|
28
28
|
sciv/preprocessing/_snapatac_.py,sha256=Dq8CHF7Psl3CQszaEokQYO56Oe2uzyWOy_cGlaOywfc,27798
|
|
29
29
|
sciv/tool/__init__.py,sha256=WXzHkWt6RgBC3qqD-98nR5wQmt6oC850ox_VpMrapSU,2468
|
|
30
|
-
sciv/tool/_algorithm_.py,sha256=
|
|
30
|
+
sciv/tool/_algorithm_.py,sha256=yTImeGMWK6Y2gxygR90bqRF1vkvU857l5M9A_Q_VoZI,49839
|
|
31
31
|
sciv/tool/_matrix_.py,sha256=O1EAhA9wxh06P_eOxEBesK7kO7IExKlhH6uJzGh1HBM,24322
|
|
32
|
-
sciv/tool/_random_walk_.py,sha256=
|
|
32
|
+
sciv/tool/_random_walk_.py,sha256=lnMiJyDuqDB75l9IkFLr7bIgKYhWxlBdIcExSErpQNw,48374
|
|
33
33
|
sciv/util/__init__.py,sha256=nOxZ8if27X7AUJ6hZwTwxOJwIBJb0obWlHjqCzjg_Gc,1964
|
|
34
34
|
sciv/util/_constant_.py,sha256=w0wKQd8guLd1ZTW24_5aECrWsIWDiNQmEpLsWlHar1A,3000
|
|
35
35
|
sciv/util/_core_.py,sha256=ZD2uSnEBHVu0i9TmXWzri_3bXZzYKnIZk818gW3zadE,14751
|
|
36
|
-
sciv-0.0.
|
|
37
|
-
sciv-0.0.
|
|
38
|
-
sciv-0.0.
|
|
39
|
-
sciv-0.0.
|
|
36
|
+
sciv-0.0.90.dist-info/METADATA,sha256=0yOyKQwEYMys8J6KodKgk2FORBN2SZQhRmiTF5je2FI,3465
|
|
37
|
+
sciv-0.0.90.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
38
|
+
sciv-0.0.90.dist-info/licenses/LICENSE,sha256=4UvHVf3qCOZjHLs4LkYz8u96XRpXnZrpTKrkUQPs5_A,1075
|
|
39
|
+
sciv-0.0.90.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|