gsMap 1.67__py3-none-any.whl → 1.71__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/{GNN_VAE → GNN}/__init__.py +0 -0
- gsMap/{GNN_VAE → GNN}/adjacency_matrix.py +75 -75
- gsMap/{GNN_VAE → GNN}/model.py +89 -89
- gsMap/{GNN_VAE → GNN}/train.py +88 -86
- gsMap/__init__.py +5 -5
- gsMap/__main__.py +2 -2
- gsMap/cauchy_combination_test.py +141 -141
- gsMap/config.py +805 -803
- gsMap/diagnosis.py +273 -273
- gsMap/find_latent_representation.py +133 -145
- gsMap/format_sumstats.py +407 -407
- gsMap/generate_ldscore.py +618 -618
- gsMap/latent_to_gene.py +234 -234
- gsMap/main.py +31 -31
- gsMap/report.py +160 -160
- gsMap/run_all_mode.py +194 -194
- gsMap/setup.py +0 -0
- gsMap/spatial_ldsc_multiple_sumstats.py +380 -380
- gsMap/templates/report_template.html +198 -198
- gsMap/utils/__init__.py +0 -0
- gsMap/utils/generate_r2_matrix.py +735 -735
- gsMap/utils/jackknife.py +514 -514
- gsMap/utils/make_annotations.py +518 -518
- gsMap/utils/manhattan_plot.py +639 -639
- gsMap/utils/regression_read.py +294 -294
- gsMap/visualize.py +198 -198
- {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/LICENSE +21 -21
- {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/METADATA +28 -22
- gsmap-1.71.dist-info/RECORD +31 -0
- gsmap-1.67.dist-info/RECORD +0 -31
- {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/WHEEL +0 -0
- {gsmap-1.67.dist-info → gsmap-1.71.dist-info}/entry_points.txt +0 -0
@@ -1,145 +1,133 @@
|
|
1
|
-
import logging
|
2
|
-
import random
|
3
|
-
import numpy as np
|
4
|
-
import scanpy as sc
|
5
|
-
import torch
|
6
|
-
from sklearn.decomposition import PCA
|
7
|
-
from sklearn.preprocessing import LabelEncoder
|
8
|
-
from gsMap.
|
9
|
-
from gsMap.
|
10
|
-
from gsMap.config import FindLatentRepresentationsConfig
|
11
|
-
|
12
|
-
logger = logging.getLogger(__name__)
|
13
|
-
|
14
|
-
|
15
|
-
def set_seed(seed_value):
|
16
|
-
"""
|
17
|
-
Set seed for reproducibility in PyTorch and other libraries.
|
18
|
-
"""
|
19
|
-
torch.manual_seed(seed_value)
|
20
|
-
np.random.seed(seed_value)
|
21
|
-
random.seed(seed_value)
|
22
|
-
if torch.cuda.is_available():
|
23
|
-
logger.info('Using GPU for computations.')
|
24
|
-
torch.cuda.manual_seed(seed_value)
|
25
|
-
torch.cuda.manual_seed_all(seed_value)
|
26
|
-
else:
|
27
|
-
logger.info('Using CPU for computations.')
|
28
|
-
|
29
|
-
def preprocess_data(adata, params):
|
30
|
-
"""
|
31
|
-
Preprocess the AnnData
|
32
|
-
"""
|
33
|
-
logger.info('Preprocessing data...')
|
34
|
-
adata.var_names_make_unique()
|
35
|
-
|
36
|
-
sc.pp.filter_genes(adata, min_cells=30)
|
37
|
-
if params.data_layer in adata.layers.keys():
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
sc.pp.
|
46
|
-
|
47
|
-
|
48
|
-
sc.pp.
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
)
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
def
|
67
|
-
self.
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
#
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
adata.obsm["latent_GVAE"] = latent_gvae
|
135
|
-
adata.obsm["latent_PCA"] = latent_pca
|
136
|
-
|
137
|
-
# Run UMAP based on latent representations
|
138
|
-
for name in ['latent_GVAE', 'latent_PCA']:
|
139
|
-
sc.pp.neighbors(adata, n_neighbors=10, use_rep=name)
|
140
|
-
sc.tl.umap(adata)
|
141
|
-
adata.obsm['X_umap_' + name] = adata.obsm['X_umap']
|
142
|
-
|
143
|
-
# Save the AnnData object
|
144
|
-
logger.info('Saving ST data...')
|
145
|
-
adata.write(args.hdf5_with_latent_path)
|
1
|
+
import logging
|
2
|
+
import random
|
3
|
+
import numpy as np
|
4
|
+
import scanpy as sc
|
5
|
+
import torch
|
6
|
+
from sklearn.decomposition import PCA
|
7
|
+
from sklearn.preprocessing import LabelEncoder
|
8
|
+
from gsMap.GNN.adjacency_matrix import construct_adjacency_matrix
|
9
|
+
from gsMap.GNN.train import ModelTrainer
|
10
|
+
from gsMap.config import FindLatentRepresentationsConfig
|
11
|
+
|
12
|
+
logger = logging.getLogger(__name__)
|
13
|
+
|
14
|
+
|
15
|
+
def set_seed(seed_value):
|
16
|
+
"""
|
17
|
+
Set seed for reproducibility in PyTorch and other libraries.
|
18
|
+
"""
|
19
|
+
torch.manual_seed(seed_value)
|
20
|
+
np.random.seed(seed_value)
|
21
|
+
random.seed(seed_value)
|
22
|
+
if torch.cuda.is_available():
|
23
|
+
logger.info('Using GPU for computations.')
|
24
|
+
torch.cuda.manual_seed(seed_value)
|
25
|
+
torch.cuda.manual_seed_all(seed_value)
|
26
|
+
else:
|
27
|
+
logger.info('Using CPU for computations.')
|
28
|
+
|
29
|
+
def preprocess_data(adata, params):
|
30
|
+
"""
|
31
|
+
Preprocess the AnnData
|
32
|
+
"""
|
33
|
+
logger.info('Preprocessing data...')
|
34
|
+
adata.var_names_make_unique()
|
35
|
+
|
36
|
+
sc.pp.filter_genes(adata, min_cells=30)
|
37
|
+
if params.data_layer in adata.layers.keys():
|
38
|
+
logger.info(f'Using data layer: {params.data_layer}...')
|
39
|
+
adata.X = adata.layers[params.data_layer]
|
40
|
+
else:
|
41
|
+
raise ValueError(f'Invalid data layer: {params.data_layer}, please check the input data.')
|
42
|
+
|
43
|
+
if params.data_layer in ['count', 'counts']:
|
44
|
+
# HVGs based on count
|
45
|
+
sc.pp.highly_variable_genes(adata,flavor="seurat_v3",n_top_genes=params.feat_cell)
|
46
|
+
# Normalize the data
|
47
|
+
sc.pp.normalize_total(adata, target_sum=1e4)
|
48
|
+
sc.pp.log1p(adata)
|
49
|
+
|
50
|
+
elif params.data_layer in adata.layers.keys():
|
51
|
+
sc.pp.highly_variable_genes(adata,flavor="seurat",n_top_genes=params.feat_cell)
|
52
|
+
|
53
|
+
return adata
|
54
|
+
|
55
|
+
|
56
|
+
class LatentRepresentationFinder:
|
57
|
+
def __init__(self, adata, args: FindLatentRepresentationsConfig):
|
58
|
+
self.params = args
|
59
|
+
|
60
|
+
self.expression_array = adata[:, adata.var.highly_variable].X.copy()
|
61
|
+
self.expression_array = sc.pp.scale(self.expression_array, max_value=10)
|
62
|
+
|
63
|
+
# Construct the neighboring graph
|
64
|
+
self.graph_dict = construct_adjacency_matrix(adata, self.params)
|
65
|
+
|
66
|
+
def compute_pca(self):
|
67
|
+
self.latent_pca = PCA(n_components=self.params.n_comps).fit_transform(self.expression_array)
|
68
|
+
return self.latent_pca
|
69
|
+
|
70
|
+
def run_gnn_vae(self, label, verbose='whole ST data'):
|
71
|
+
|
72
|
+
# Use PCA if specified
|
73
|
+
if self.params.input_pca:
|
74
|
+
node_X = self.compute_pca()
|
75
|
+
else:
|
76
|
+
node_X = self.expression_array
|
77
|
+
|
78
|
+
# Update the input shape
|
79
|
+
self.params.n_nodes = node_X.shape[0]
|
80
|
+
self.params.feat_cell = node_X.shape[1]
|
81
|
+
|
82
|
+
# Run GNN
|
83
|
+
logger.info(f'Finding latent representations for {verbose}...')
|
84
|
+
gvae = ModelTrainer(node_X, self.graph_dict, self.params, label)
|
85
|
+
gvae.run_train()
|
86
|
+
|
87
|
+
del self.graph_dict
|
88
|
+
|
89
|
+
return gvae.get_latent()
|
90
|
+
|
91
|
+
|
92
|
+
def run_find_latent_representation(args: FindLatentRepresentationsConfig):
|
93
|
+
set_seed(2024)
|
94
|
+
|
95
|
+
# Load the ST data
|
96
|
+
logger.info(f'Loading ST data of {args.sample_name}...')
|
97
|
+
adata = sc.read_h5ad(args.input_hdf5_path)
|
98
|
+
logger.info(f'The ST data contains {adata.shape[0]} cells, {adata.shape[1]} genes.')
|
99
|
+
|
100
|
+
# Load the cell type annotation
|
101
|
+
if args.annotation is not None:
|
102
|
+
# Remove cells without enough annotations
|
103
|
+
adata = adata[~adata.obs[args.annotation].isnull()]
|
104
|
+
num = adata.obs[args.annotation].value_counts()
|
105
|
+
valid_annotations = num[num >= 30].index.to_list()
|
106
|
+
adata = adata[adata.obs[args.annotation].isin(valid_annotations)]
|
107
|
+
|
108
|
+
le = LabelEncoder()
|
109
|
+
label = le.fit_transform(adata.obs[args.annotation])
|
110
|
+
else:
|
111
|
+
label = None
|
112
|
+
|
113
|
+
# Preprocess data
|
114
|
+
adata = preprocess_data(adata, args)
|
115
|
+
|
116
|
+
latent_rep = LatentRepresentationFinder(adata, args)
|
117
|
+
latent_gvae = latent_rep.run_gnn_vae(label)
|
118
|
+
latent_pca = latent_rep.latent_pca
|
119
|
+
|
120
|
+
# Add latent representations to the AnnData object
|
121
|
+
logger.info('Adding latent representations...')
|
122
|
+
adata.obsm["latent_GVAE"] = latent_gvae
|
123
|
+
adata.obsm["latent_PCA"] = latent_pca
|
124
|
+
|
125
|
+
# Run UMAP based on latent representations
|
126
|
+
#for name in ['latent_GVAE', 'latent_PCA']:
|
127
|
+
# sc.pp.neighbors(adata, n_neighbors=10, use_rep=name)
|
128
|
+
# sc.tl.umap(adata)
|
129
|
+
# adata.obsm['X_umap_' + name] = adata.obsm['X_umap']
|
130
|
+
|
131
|
+
# Save the AnnData object
|
132
|
+
logger.info('Saving ST data...')
|
133
|
+
adata.write(args.hdf5_with_latent_path)
|