gsMap 1.71__py3-none-any.whl → 1.71.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/model.py +2 -1
- gsMap/__init__.py +1 -1
- gsMap/config.py +1 -0
- gsMap/diagnosis.py +12 -11
- gsMap/find_latent_representation.py +8 -2
- gsMap/latent_to_gene.py +38 -20
- gsMap/spatial_ldsc_multiple_sumstats.py +161 -181
- {gsmap-1.71.dist-info → gsmap-1.71.2.dist-info}/METADATA +4 -4
- {gsmap-1.71.dist-info → gsmap-1.71.2.dist-info}/RECORD +12 -12
- {gsmap-1.71.dist-info → gsmap-1.71.2.dist-info}/WHEEL +1 -1
- {gsmap-1.71.dist-info → gsmap-1.71.2.dist-info}/LICENSE +0 -0
- {gsmap-1.71.dist-info → gsmap-1.71.2.dist-info}/entry_points.txt +0 -0
gsMap/GNN/model.py
CHANGED
@@ -85,5 +85,6 @@ class GATModel(nn.Module):
|
|
85
85
|
mu, logvar = self.encode(x, edge_index)
|
86
86
|
z = self.reparameterize(mu, logvar)
|
87
87
|
x_reconstructed = self.decoder(z)
|
88
|
-
pred_label = F.softmax(self.cluster(z), dim=1)
|
88
|
+
# pred_label = F.softmax(self.cluster(z), dim=1)
|
89
|
+
pred_label = self.cluster(z)
|
89
90
|
return pred_label, x_reconstructed, z, mu, logvar
|
gsMap/__init__.py
CHANGED
gsMap/config.py
CHANGED
gsMap/diagnosis.py
CHANGED
@@ -85,17 +85,18 @@ def compute_gene_diagnostic_info(config: DiagnosisConfig):
|
|
85
85
|
gene_diagnostic_info.to_csv(gene_diagnostic_info_save_path, index=False)
|
86
86
|
logger.info(f'Gene diagnostic information saved to {gene_diagnostic_info_save_path}.')
|
87
87
|
|
88
|
-
#
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
adata.var[f'{config.trait_name}
|
93
|
-
adata.var[f'{config.trait_name}
|
94
|
-
|
95
|
-
#
|
96
|
-
|
97
|
-
adata.obs
|
98
|
-
adata.
|
88
|
+
# TODO: A new script is needed to save the gene diagnostic info to adata.var and trait_ldsc_result to adata.obs when running multiple traits
|
89
|
+
# # Save to adata.var with the trait_name prefix
|
90
|
+
# logger.info('Saving gene diagnostic info to adata.var...')
|
91
|
+
# gene_diagnostic_info.set_index('Gene', inplace=True) # Use 'Gene' as the index to align with adata.var
|
92
|
+
# adata.var[f'{config.trait_name}_Annotation'] = gene_diagnostic_info['Annotation']
|
93
|
+
# adata.var[f'{config.trait_name}_Median_GSS'] = gene_diagnostic_info['Median_GSS']
|
94
|
+
# adata.var[f'{config.trait_name}_PCC'] = gene_diagnostic_info['PCC']
|
95
|
+
#
|
96
|
+
# # Save trait_ldsc_result to adata.obs
|
97
|
+
# logger.info(f'Saving trait LDSC results to adata.obs as gsMap_{config.trait_name}_p_value...')
|
98
|
+
# adata.obs[f'gsMap_{config.trait_name}_p_value'] = trait_ldsc_result['p']
|
99
|
+
# adata.write(config.hdf5_with_latent_path, )
|
99
100
|
|
100
101
|
return gene_diagnostic_info.reset_index()
|
101
102
|
|
@@ -33,15 +33,21 @@ def preprocess_data(adata, params):
|
|
33
33
|
logger.info('Preprocessing data...')
|
34
34
|
adata.var_names_make_unique()
|
35
35
|
|
36
|
-
sc.pp.filter_genes(adata, min_cells=30)
|
37
36
|
if params.data_layer in adata.layers.keys():
|
38
37
|
logger.info(f'Using data layer: {params.data_layer}...')
|
39
38
|
adata.X = adata.layers[params.data_layer]
|
39
|
+
sc.pp.filter_genes(adata, min_cells=30)
|
40
|
+
elif params.data_layer == 'X':
|
41
|
+
logger.info(f'Using data layer: {params.data_layer}...')
|
42
|
+
if adata.X.dtype == 'float32' or adata.X.dtype == 'float64':
|
43
|
+
logger.warning(f'The data layer should be raw count data')
|
44
|
+
sc.pp.filter_genes(adata, min_cells=30)
|
40
45
|
else:
|
41
46
|
raise ValueError(f'Invalid data layer: {params.data_layer}, please check the input data.')
|
42
47
|
|
43
|
-
if params.data_layer in ['count', 'counts']:
|
48
|
+
if params.data_layer in ['count', 'counts', 'X']:
|
44
49
|
# HVGs based on count
|
50
|
+
logger.info('Dealing with count data...')
|
45
51
|
sc.pp.highly_variable_genes(adata,flavor="seurat_v3",n_top_genes=params.feat_cell)
|
46
52
|
# Normalize the data
|
47
53
|
sc.pp.normalize_total(adata, target_sum=1e4)
|
gsMap/latent_to_gene.py
CHANGED
@@ -4,6 +4,7 @@ from pathlib import Path
|
|
4
4
|
import numpy as np
|
5
5
|
import pandas as pd
|
6
6
|
import scanpy as sc
|
7
|
+
import scipy
|
7
8
|
from scipy.stats import gmean
|
8
9
|
from scipy.stats import rankdata
|
9
10
|
from sklearn.metrics.pairwise import cosine_similarity
|
@@ -62,7 +63,7 @@ def build_spatial_net(adata, annotation, num_neighbour):
|
|
62
63
|
logger.info(f'Cell annotations are not provided...')
|
63
64
|
spatial_net = find_neighbors(coor, num_neighbour)
|
64
65
|
|
65
|
-
return spatial_net
|
66
|
+
return spatial_net.groupby('Cell1')['Cell2'].apply(np.array).to_dict()
|
66
67
|
|
67
68
|
|
68
69
|
def find_neighbors_regional(cell_pos, spatial_net_dict, coor_latent, config, cell_annotations):
|
@@ -128,18 +129,20 @@ def compute_regional_mkscore(cell_pos, spatial_net_dict, coor_latent, config, ce
|
|
128
129
|
def run_latent_to_gene(config: LatentToGeneConfig):
|
129
130
|
logger.info('------Loading the spatial data...')
|
130
131
|
adata = sc.read_h5ad(config.hdf5_with_latent_path)
|
132
|
+
logger.info(f'Loaded spatial data with {adata.n_obs} cells and {adata.n_vars} genes.')
|
131
133
|
|
132
134
|
if config.annotation is not None:
|
133
135
|
logger.info(f'------Cell annotations are provided as {config.annotation}...')
|
136
|
+
initial_cell_count = adata.n_obs
|
134
137
|
adata = adata[~pd.isnull(adata.obs[config.annotation]), :]
|
138
|
+
logger.info(f'Removed null annotations. Cells retained: {adata.n_obs} (initial: {initial_cell_count}).')
|
135
139
|
|
136
140
|
# Homologs transformation
|
137
141
|
if config.homolog_file is not None:
|
138
142
|
logger.info(f'------Transforming the {config.species} to HUMAN_GENE_SYM...')
|
139
143
|
homologs = pd.read_csv(config.homolog_file, sep='\t')
|
140
144
|
if homologs.shape[1] != 2:
|
141
|
-
raise ValueError(
|
142
|
-
"Homologs file must have two columns: one for the species and one for the human gene symbol.")
|
145
|
+
raise ValueError("Homologs file must have two columns: one for the species and one for the human gene symbol.")
|
143
146
|
|
144
147
|
homologs.columns = [config.species, 'HUMAN_GENE_SYM']
|
145
148
|
homologs.set_index(config.species, inplace=True)
|
@@ -156,35 +159,30 @@ def run_latent_to_gene(config: LatentToGeneConfig):
|
|
156
159
|
|
157
160
|
if config.annotation is not None:
|
158
161
|
cell_annotations = adata.obs[config.annotation].values
|
162
|
+
logger.info(f'Using cell annotations for {len(cell_annotations)} cells.')
|
159
163
|
else:
|
160
164
|
cell_annotations = None
|
161
165
|
|
162
166
|
# Build the spatial graph
|
163
|
-
|
164
|
-
spatial_net_dict =
|
167
|
+
logger.info('------Building the spatial graph...')
|
168
|
+
spatial_net_dict = build_spatial_net(adata, config.annotation, config.num_neighbour_spatial)
|
169
|
+
logger.info('Spatial graph built successfully.')
|
165
170
|
|
166
171
|
# Extract the latent representation
|
172
|
+
logger.info('------Extracting the latent representation...')
|
167
173
|
coor_latent = adata.obsm[config.latent_representation]
|
168
174
|
coor_latent = coor_latent.astype(np.float32)
|
169
|
-
|
170
|
-
# Compute ranks
|
171
|
-
logger.info('------Ranking the spatial data...')
|
172
|
-
adata_X = adata.X.tocsr()
|
173
|
-
ranks = np.zeros((n_cells, n_genes), dtype=np.float32)
|
174
|
-
|
175
|
-
for i in tqdm(range(n_cells), desc="Computing ranks per cell"):
|
176
|
-
data = adata_X[i, :].toarray().flatten()
|
177
|
-
ranks[i, :] = rankdata(data, method='average')
|
175
|
+
logger.info('Latent representation extracted.')
|
178
176
|
|
179
177
|
# Geometric mean across slices
|
178
|
+
gM = None
|
180
179
|
if config.gM_slices is not None:
|
181
180
|
logger.info('Geometrical mean across multiple slices is provided.')
|
182
181
|
gM_df = pd.read_parquet(config.gM_slices)
|
183
182
|
if config.species is not None:
|
184
|
-
homologs = pd.read_csv(config.homolog_file, sep='\t'
|
183
|
+
homologs = pd.read_csv(config.homolog_file, sep='\t')
|
185
184
|
if homologs.shape[1] < 2:
|
186
|
-
raise ValueError(
|
187
|
-
"Homologs file must have at least two columns: one for the species and one for the human gene symbol.")
|
185
|
+
raise ValueError("Homologs file must have at least two columns: one for the species and one for the human gene symbol.")
|
188
186
|
homologs.columns = [config.species, 'HUMAN_GENE_SYM']
|
189
187
|
homologs.set_index(config.species, inplace=True)
|
190
188
|
gM_df = gM_df.loc[gM_df.index.isin(homologs.index)]
|
@@ -193,20 +191,36 @@ def run_latent_to_gene(config: LatentToGeneConfig):
|
|
193
191
|
gM_df = gM_df.loc[common_genes]
|
194
192
|
gM = gM_df['G_Mean'].values
|
195
193
|
adata = adata[:, common_genes]
|
196
|
-
|
194
|
+
logger.info(f'{len(common_genes)} common genes retained after loading the cross slice geometric mean.')
|
195
|
+
|
196
|
+
# Compute ranks after taking common genes with gM_slices
|
197
|
+
logger.info('------Ranking the spatial data...')
|
198
|
+
if not scipy.sparse.issparse(adata.X):
|
199
|
+
adata_X = scipy.sparse.csr_matrix(adata.X)
|
200
|
+
elif isinstance(adata.X, scipy.sparse.csr_matrix):
|
201
|
+
adata_X = adata.X # Avoid copying if already CSR
|
197
202
|
else:
|
203
|
+
adata_X = adata.X.tocsr()
|
204
|
+
|
205
|
+
ranks = np.zeros((n_cells, adata.n_vars), dtype=np.float32)
|
206
|
+
|
207
|
+
for i in tqdm(range(n_cells), desc="Computing ranks per cell"):
|
208
|
+
data = adata_X[i, :].toarray().flatten()
|
209
|
+
ranks[i, :] = rankdata(data, method='average')
|
210
|
+
|
211
|
+
if gM is None:
|
198
212
|
gM = gmean(ranks, axis=0)
|
199
213
|
|
200
214
|
# Compute the fraction of each gene across cells
|
201
215
|
adata_X_bool = adata_X.astype(bool)
|
202
216
|
frac_whole = np.asarray(adata_X_bool.sum(axis=0)).flatten() / n_cells
|
217
|
+
logger.info('Gene expression proportion of each gene across cells computed.')
|
203
218
|
|
204
219
|
# Normalize the ranks
|
205
|
-
ranks
|
220
|
+
ranks /= gM
|
206
221
|
|
207
222
|
# Compute marker scores in parallel
|
208
223
|
logger.info('------Computing marker scores...')
|
209
|
-
|
210
224
|
def compute_mk_score_wrapper(cell_pos):
|
211
225
|
return compute_regional_mkscore(
|
212
226
|
cell_pos, spatial_net_dict, coor_latent, config, cell_annotations, ranks, frac_whole, adata_X_bool
|
@@ -214,12 +228,14 @@ def run_latent_to_gene(config: LatentToGeneConfig):
|
|
214
228
|
|
215
229
|
mk_scores = [compute_mk_score_wrapper(cell_pos) for cell_pos in tqdm(range(n_cells), desc="Calculating marker scores")]
|
216
230
|
mk_score = np.vstack(mk_scores).T
|
231
|
+
logger.info('Marker scores computed.')
|
217
232
|
|
218
233
|
# Remove mitochondrial genes
|
219
234
|
gene_names = adata.var_names.values.astype(str)
|
220
235
|
mt_gene_mask = ~(np.char.startswith(gene_names, 'MT-') | np.char.startswith(gene_names, 'mt-'))
|
221
236
|
mk_score = mk_score[mt_gene_mask, :]
|
222
237
|
gene_names = gene_names[mt_gene_mask]
|
238
|
+
logger.info(f'Removed mitochondrial genes. Remaining genes: {len(gene_names)}.')
|
223
239
|
|
224
240
|
# Save the marker scores
|
225
241
|
logger.info(f'------Saving marker scores ...')
|
@@ -229,6 +245,8 @@ def run_latent_to_gene(config: LatentToGeneConfig):
|
|
229
245
|
mk_score_df.reset_index(inplace=True)
|
230
246
|
mk_score_df.rename(columns={'index': 'HUMAN_GENE_SYM'}, inplace=True)
|
231
247
|
mk_score_df.to_feather(output_file_path)
|
248
|
+
logger.info(f'Marker scores saved to {output_file_path}.')
|
232
249
|
|
233
250
|
# Save the modified adata object to disk
|
234
251
|
adata.write(config.hdf5_with_latent_path)
|
252
|
+
logger.info(f'Modified adata object saved to {config.hdf5_with_latent_path}.')
|
@@ -2,6 +2,7 @@ import gc
|
|
2
2
|
import logging
|
3
3
|
import os
|
4
4
|
from collections import defaultdict
|
5
|
+
from functools import partial
|
5
6
|
from pathlib import Path
|
6
7
|
|
7
8
|
import anndata as ad
|
@@ -18,14 +19,15 @@ from gsMap.utils.regression_read import _read_sumstats, _read_w_ld, _read_ref_ld
|
|
18
19
|
logger = logging.getLogger('gsMap.spatial_ldsc')
|
19
20
|
|
20
21
|
|
21
|
-
|
22
|
-
|
22
|
+
def _coef_new(jknife, Nbar):
|
23
|
+
"""Calculate coefficients adjusted by Nbar."""
|
23
24
|
est_ = jknife.jknife_est[0, 0] / Nbar
|
24
25
|
se_ = jknife.jknife_se[0, 0] / Nbar
|
25
26
|
return est_, se_
|
26
27
|
|
27
28
|
|
28
29
|
def append_intercept(x):
|
30
|
+
"""Append an intercept term to the design matrix."""
|
29
31
|
n_row = x.shape[0]
|
30
32
|
intercept = np.ones((n_row, 1))
|
31
33
|
x_new = np.concatenate((x, intercept), axis=1)
|
@@ -33,6 +35,7 @@ def append_intercept(x):
|
|
33
35
|
|
34
36
|
|
35
37
|
def filter_sumstats_by_chisq(sumstats, chisq_max):
|
38
|
+
"""Filter summary statistics based on chi-squared threshold."""
|
36
39
|
before_len = len(sumstats)
|
37
40
|
if chisq_max is None:
|
38
41
|
chisq_max = max(0.001 * sumstats.N.max(), 80)
|
@@ -48,12 +51,14 @@ def filter_sumstats_by_chisq(sumstats, chisq_max):
|
|
48
51
|
|
49
52
|
|
50
53
|
def aggregate(y, x, N, M, intercept=1):
|
54
|
+
"""Aggregate function used in weight calculation."""
|
51
55
|
num = M * (np.mean(y) - intercept)
|
52
56
|
denom = np.mean(np.multiply(x, N))
|
53
57
|
return num / denom
|
54
58
|
|
55
59
|
|
56
60
|
def weights(ld, w_ld, N, M, hsq, intercept=1):
|
61
|
+
"""Calculate weights for regression."""
|
57
62
|
M = float(M)
|
58
63
|
hsq = np.clip(hsq, 0.0, 1.0)
|
59
64
|
ld = np.maximum(ld, 1.0)
|
@@ -65,178 +70,132 @@ def weights(ld, w_ld, N, M, hsq, intercept=1):
|
|
65
70
|
return w
|
66
71
|
|
67
72
|
|
68
|
-
def
|
69
|
-
|
73
|
+
def get_weight_optimized(sumstats, x_tot_precomputed, M_tot, w_ld, intercept=1):
|
74
|
+
"""Optimized function to calculate initial weights."""
|
75
|
+
tot_agg = aggregate(sumstats.chisq, x_tot_precomputed, sumstats.N, M_tot, intercept)
|
76
|
+
initial_w = weights(x_tot_precomputed, w_ld.LD_weights.values, sumstats.N.values, M_tot, tot_agg, intercept)
|
77
|
+
initial_w = np.sqrt(initial_w)
|
78
|
+
return initial_w
|
79
|
+
|
80
|
+
|
81
|
+
def jackknife_for_processmap(spot_id, spatial_annotation, ref_ld_baseline_column_sum, sumstats, baseline_annotation, w_ld_common_snp, Nbar, n_blocks):
|
82
|
+
"""Perform jackknife resampling for a given spot."""
|
70
83
|
spot_spatial_annotation = spatial_annotation[:, spot_id]
|
71
84
|
spot_x_tot_precomputed = spot_spatial_annotation + ref_ld_baseline_column_sum
|
72
|
-
initial_w = (
|
73
|
-
|
74
|
-
M_tot=10000, w_ld=w_ld_common_snp, intercept=1)
|
75
|
-
.astype(np.float32)
|
76
|
-
.reshape((-1, 1)))
|
77
|
-
|
78
|
-
# apply the weight to baseline annotation, spatial annotation and CHISQ
|
85
|
+
initial_w = get_weight_optimized(sumstats, x_tot_precomputed=spot_x_tot_precomputed,
|
86
|
+
M_tot=10000, w_ld=w_ld_common_snp, intercept=1).astype(np.float32).reshape((-1, 1))
|
79
87
|
initial_w_scaled = initial_w / np.sum(initial_w)
|
80
88
|
baseline_annotation_spot = baseline_annotation * initial_w_scaled
|
81
89
|
spatial_annotation_spot = spot_spatial_annotation.reshape((-1, 1)) * initial_w_scaled
|
82
90
|
CHISQ = sumstats.chisq.values.reshape((-1, 1))
|
83
91
|
y = CHISQ * initial_w_scaled
|
84
|
-
|
85
|
-
# run the jackknife
|
86
|
-
x_focal = np.concatenate((spatial_annotation_spot,
|
87
|
-
baseline_annotation_spot), axis=1)
|
92
|
+
x_focal = np.concatenate((spatial_annotation_spot, baseline_annotation_spot), axis=1)
|
88
93
|
try:
|
89
94
|
jknife = jk.LstsqJackknifeFast(x_focal, y, n_blocks)
|
90
|
-
# LinAlgError
|
91
95
|
except np.linalg.LinAlgError as e:
|
92
96
|
logger.warning(f'LinAlgError: {e}')
|
93
97
|
return np.nan, np.nan
|
94
|
-
return _coef_new(jknife)
|
95
|
-
|
96
|
-
|
97
|
-
# Updated function
|
98
|
-
def get_weight_optimized(sumstats, x_tot_precomputed, M_tot, w_ld, intercept=1):
|
99
|
-
tot_agg = aggregate(sumstats.chisq, x_tot_precomputed, sumstats.N, M_tot, intercept)
|
100
|
-
initial_w = weights(x_tot_precomputed, w_ld.LD_weights.values, sumstats.N.values, M_tot, tot_agg, intercept)
|
101
|
-
initial_w = np.sqrt(initial_w)
|
102
|
-
return initial_w
|
98
|
+
return _coef_new(jknife, Nbar)
|
103
99
|
|
104
100
|
|
105
101
|
def _preprocess_sumstats(trait_name, sumstat_file_path, baseline_and_w_ld_common_snp: pd.Index, chisq_max=None):
|
106
|
-
|
102
|
+
"""Preprocess summary statistics."""
|
107
103
|
sumstats = _read_sumstats(fh=sumstat_file_path, alleles=False, dropna=False)
|
108
104
|
sumstats.set_index('SNP', inplace=True)
|
109
105
|
sumstats = sumstats.astype(np.float32)
|
110
106
|
sumstats = filter_sumstats_by_chisq(sumstats, chisq_max)
|
111
|
-
|
112
|
-
# NB: The intersection order is essential for keeping the same order of SNPs by its BP location
|
113
107
|
common_snp = baseline_and_w_ld_common_snp.intersection(sumstats.index)
|
114
108
|
if len(common_snp) < 200000:
|
115
109
|
logger.warning(f'WARNING: number of SNPs less than 200k; for {trait_name} this is almost always bad.')
|
116
|
-
|
117
110
|
sumstats = sumstats.loc[common_snp]
|
118
|
-
|
119
|
-
# get the common index position of baseline_and_w_ld_common_snp for quick access
|
120
111
|
sumstats['common_index_pos'] = pd.Index(baseline_and_w_ld_common_snp).get_indexer(sumstats.index)
|
121
112
|
return sumstats
|
122
113
|
|
123
114
|
|
124
|
-
def
|
125
|
-
|
126
|
-
# first validate if all sumstats file exists
|
115
|
+
def _get_sumstats_with_common_snp_from_sumstats_dict(sumstats_config_dict: dict, baseline_and_w_ld_common_snp: pd.Index, chisq_max=None):
|
116
|
+
"""Get summary statistics with common SNPs among all traits."""
|
127
117
|
logger.info('Validating sumstats files...')
|
128
118
|
for trait_name, sumstat_file_path in sumstats_config_dict.items():
|
129
119
|
if not os.path.exists(sumstat_file_path):
|
130
120
|
raise FileNotFoundError(f'{sumstat_file_path} not found')
|
131
|
-
# then load all sumstats
|
132
121
|
sumstats_cleaned_dict = {}
|
133
122
|
for trait_name, sumstat_file_path in sumstats_config_dict.items():
|
134
|
-
sumstats_cleaned_dict[trait_name] = _preprocess_sumstats(trait_name, sumstat_file_path,
|
135
|
-
|
136
|
-
|
137
|
-
|
123
|
+
sumstats_cleaned_dict[trait_name] = _preprocess_sumstats(trait_name, sumstat_file_path, baseline_and_w_ld_common_snp, chisq_max)
|
124
|
+
common_snp_among_all_sumstats = None
|
125
|
+
for trait_name, sumstats in sumstats_cleaned_dict.items():
|
126
|
+
if common_snp_among_all_sumstats is None:
|
127
|
+
common_snp_among_all_sumstats = sumstats.index
|
128
|
+
else:
|
129
|
+
common_snp_among_all_sumstats = common_snp_among_all_sumstats.intersection(sumstats.index)
|
130
|
+
for trait_name, sumstats in sumstats_cleaned_dict.items():
|
131
|
+
sumstats_cleaned_dict[trait_name] = sumstats.loc[common_snp_among_all_sumstats]
|
132
|
+
logger.info(f'Common SNPs among all sumstats: {len(common_snp_among_all_sumstats)}')
|
133
|
+
return sumstats_cleaned_dict, common_snp_among_all_sumstats
|
138
134
|
|
139
135
|
|
140
136
|
class S_LDSC_Boost_with_pre_calculate_SNP_Gene_weight_matrix:
|
137
|
+
"""Class to handle pre-calculated SNP-Gene weight matrix for quick mode."""
|
141
138
|
def __init__(self, config: SpatialLDSCConfig, common_snp_among_all_sumstats_pos):
|
142
139
|
self.config = config
|
143
140
|
mk_score = pd.read_feather(config.mkscore_feather_path).set_index('HUMAN_GENE_SYM')
|
144
141
|
mk_score_genes = mk_score.index
|
145
|
-
|
146
142
|
snp_gene_weight_adata = ad.read_h5ad(config.snp_gene_weight_adata_path)
|
147
143
|
common_genes = mk_score_genes.intersection(snp_gene_weight_adata.var.index)
|
148
144
|
common_snps = snp_gene_weight_adata.obs.index
|
149
|
-
# self.snp_gene_weight_adata = snp_gene_weight_adata[common_snp_among_all_sumstats:, common_genes.to_list()]
|
150
145
|
self.snp_gene_weight_matrix = snp_gene_weight_adata[common_snp_among_all_sumstats_pos, common_genes.to_list()].X
|
151
146
|
self.mk_score_common = mk_score.loc[common_genes]
|
152
|
-
|
153
|
-
# calculate the chunk number
|
154
147
|
self.chunk_starts = list(range(0, self.mk_score_common.shape[1], self.config.spots_per_chunk_quick_mode))
|
155
148
|
|
156
149
|
def fetch_ldscore_by_chunk(self, chunk_index):
|
150
|
+
"""Fetch LD score by chunk."""
|
157
151
|
chunk_start = self.chunk_starts[chunk_index]
|
158
|
-
mk_score_chunk = self.mk_score_common.iloc[:,
|
159
|
-
|
160
|
-
ldscore_chunk = self.calculate_ldscore_use_SNP_Gene_weight_matrix_by_chunk(
|
161
|
-
mk_score_chunk,
|
162
|
-
drop_dummy_na=False,
|
163
|
-
)
|
164
|
-
|
152
|
+
mk_score_chunk = self.mk_score_common.iloc[:, chunk_start:chunk_start + self.config.spots_per_chunk_quick_mode]
|
153
|
+
ldscore_chunk = self.calculate_ldscore_use_SNP_Gene_weight_matrix_by_chunk(mk_score_chunk, drop_dummy_na=False)
|
165
154
|
spots_name = self.mk_score_common.columns[chunk_start:chunk_start + self.config.spots_per_chunk_quick_mode]
|
166
155
|
return ldscore_chunk, spots_name
|
167
156
|
|
168
|
-
def calculate_ldscore_use_SNP_Gene_weight_matrix_by_chunk(self,
|
169
|
-
|
170
|
-
drop_dummy_na=True,
|
171
|
-
):
|
172
|
-
|
157
|
+
def calculate_ldscore_use_SNP_Gene_weight_matrix_by_chunk(self, mk_score_chunk, drop_dummy_na=True):
|
158
|
+
"""Calculate LD score using SNP-Gene weight matrix by chunk."""
|
173
159
|
if drop_dummy_na:
|
174
160
|
ldscore_chr_chunk = self.snp_gene_weight_matrix[:, :-1] @ mk_score_chunk
|
175
161
|
else:
|
176
162
|
ldscore_chr_chunk = self.snp_gene_weight_matrix @ mk_score_chunk
|
177
|
-
|
178
163
|
return ldscore_chr_chunk
|
179
164
|
|
180
165
|
|
181
|
-
def
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
for trait_name, sumstat_file_path in sumstats_config_dict.items():
|
191
|
-
sumstats_cleaned_dict[trait_name] = _preprocess_sumstats(trait_name, sumstat_file_path,
|
192
|
-
baseline_and_w_ld_common_snp, chisq_max)
|
193
|
-
# get the common snps among all sumstats
|
194
|
-
common_snp_among_all_sumstats = None
|
195
|
-
for trait_name, sumstats in sumstats_cleaned_dict.items():
|
196
|
-
if common_snp_among_all_sumstats is None:
|
197
|
-
common_snp_among_all_sumstats = sumstats.index
|
198
|
-
else:
|
199
|
-
common_snp_among_all_sumstats = common_snp_among_all_sumstats.intersection(sumstats.index)
|
200
|
-
|
201
|
-
# filter the common snps among all sumstats
|
202
|
-
for trait_name, sumstats in sumstats_cleaned_dict.items():
|
203
|
-
sumstats_cleaned_dict[trait_name] = sumstats.loc[common_snp_among_all_sumstats]
|
204
|
-
|
205
|
-
logger.info(f'Common SNPs among all sumstats: {len(common_snp_among_all_sumstats)}')
|
206
|
-
return sumstats_cleaned_dict, common_snp_among_all_sumstats
|
166
|
+
def load_ldscore_chunk_from_feather(chunk_index, common_snp_among_all_sumstats_pos, config):
|
167
|
+
"""Load LD score chunk from feather format."""
|
168
|
+
sample_name = config.sample_name
|
169
|
+
ld_file_spatial = f'{config.ldscore_save_dir}/{sample_name}_chunk{chunk_index}/{sample_name}.'
|
170
|
+
ref_ld_spatial = _read_ref_ld_v2(ld_file_spatial)
|
171
|
+
ref_ld_spatial = ref_ld_spatial.iloc[common_snp_among_all_sumstats_pos]
|
172
|
+
ref_ld_spatial = ref_ld_spatial.astype(np.float32, copy=False)
|
173
|
+
spatial_annotation_cnames = ref_ld_spatial.columns
|
174
|
+
return ref_ld_spatial.values, spatial_annotation_cnames
|
207
175
|
|
208
176
|
|
209
177
|
def run_spatial_ldsc(config: SpatialLDSCConfig):
|
210
|
-
|
211
|
-
|
178
|
+
"""Run spatial LDSC analysis."""
|
179
|
+
logger.info(f'------Running Spatial LDSC for {config.sample_name}...')
|
212
180
|
n_blocks = config.n_blocks
|
213
181
|
sample_name = config.sample_name
|
214
182
|
|
215
|
-
|
216
|
-
# Load the regression weights
|
183
|
+
# Load regression weights
|
217
184
|
w_ld = _read_w_ld(config.w_file)
|
218
185
|
w_ld_cname = w_ld.columns[1]
|
219
186
|
w_ld.set_index('SNP', inplace=True)
|
220
187
|
|
221
188
|
ld_file_baseline = f'{config.ldscore_save_dir}/baseline/baseline.'
|
222
|
-
|
223
189
|
ref_ld_baseline = _read_ref_ld_v2(ld_file_baseline)
|
224
|
-
# n_annot_baseline = len(ref_ld_baseline.columns)
|
225
|
-
# M_annot_baseline = _read_M_v2(ld_file_baseline, n_annot_baseline, config.not_M_5_50)
|
226
|
-
|
227
|
-
# common snp between baseline and w_ld
|
228
190
|
baseline_and_w_ld_common_snp = ref_ld_baseline.index.intersection(w_ld.index)
|
229
191
|
baseline_and_w_ld_common_snp_pos = pd.Index(ref_ld_baseline.index).get_indexer(baseline_and_w_ld_common_snp)
|
230
192
|
|
231
|
-
# Clean the sumstats
|
232
193
|
sumstats_cleaned_dict, common_snp_among_all_sumstats = _get_sumstats_with_common_snp_from_sumstats_dict(
|
233
|
-
config.sumstats_config_dict, baseline_and_w_ld_common_snp,
|
234
|
-
chisq_max=config.chisq_max)
|
194
|
+
config.sumstats_config_dict, baseline_and_w_ld_common_snp, chisq_max=config.chisq_max)
|
235
195
|
common_snp_among_all_sumstats_pos = ref_ld_baseline.index.get_indexer(common_snp_among_all_sumstats)
|
236
196
|
|
237
|
-
|
238
|
-
|
239
|
-
common_snp_among_all_sumstats_pos).is_monotonic_increasing, 'common_snp_among_all_sumstats_pos is not monotonic increasing'
|
197
|
+
if not pd.Series(common_snp_among_all_sumstats_pos).is_monotonic_increasing:
|
198
|
+
raise ValueError('common_snp_among_all_sumstats_pos is not monotonic increasing')
|
240
199
|
|
241
200
|
if len(common_snp_among_all_sumstats) < 200000:
|
242
201
|
logger.warning(
|
@@ -245,136 +204,157 @@ def run_spatial_ldsc(config: SpatialLDSCConfig):
|
|
245
204
|
ref_ld_baseline = ref_ld_baseline.loc[common_snp_among_all_sumstats]
|
246
205
|
w_ld = w_ld.loc[common_snp_among_all_sumstats]
|
247
206
|
|
248
|
-
#
|
207
|
+
# Load additional baseline annotations if needed
|
249
208
|
if config.use_additional_baseline_annotation:
|
250
|
-
|
209
|
+
logger.info('Using additional baseline annotations')
|
251
210
|
ld_file_baseline_additional = f'{config.ldscore_save_dir}/additional_baseline/baseline.'
|
252
211
|
ref_ld_baseline_additional = _read_ref_ld_v2(ld_file_baseline_additional)
|
253
|
-
n_annot_baseline_additional = len(ref_ld_baseline_additional.columns)
|
254
|
-
logger.info(f'{len(ref_ld_baseline_additional.columns)} additional baseline annotations loaded')
|
255
|
-
# M_annot_baseline_additional = _read_M_v2(ld_file_baseline_additional, n_annot_baseline_additional,
|
256
|
-
# config.not_M_5_50)
|
257
212
|
ref_ld_baseline_additional = ref_ld_baseline_additional.loc[common_snp_among_all_sumstats]
|
258
213
|
ref_ld_baseline = pd.concat([ref_ld_baseline, ref_ld_baseline_additional], axis=1)
|
259
214
|
del ref_ld_baseline_additional
|
260
215
|
|
261
|
-
#
|
216
|
+
# Initialize s_ldsc once if quick_mode
|
217
|
+
s_ldsc = None
|
262
218
|
if config.ldscore_save_format == 'quick_mode':
|
263
219
|
s_ldsc = S_LDSC_Boost_with_pre_calculate_SNP_Gene_weight_matrix(config, common_snp_among_all_sumstats_pos)
|
264
220
|
total_chunk_number_found = len(s_ldsc.chunk_starts)
|
265
|
-
|
221
|
+
logger.info(f'Split data into {total_chunk_number_found} chunks')
|
266
222
|
else:
|
267
|
-
|
268
|
-
total_chunk_number_found = sum('chunk' in name for name in all_file)
|
269
|
-
print(f'Find {total_chunk_number_found} chunked files in {config.ldscore_save_dir}')
|
270
|
-
|
271
|
-
if config.all_chunk is None:
|
272
|
-
if config.chunk_range is not None:
|
273
|
-
assert config.chunk_range[0] >= 1 and config.chunk_range[
|
274
|
-
1] <= total_chunk_number_found, 'Chunk range out of bound. It should be in [1, all_chunk]'
|
275
|
-
print(
|
276
|
-
f'chunk range provided, using chunked files from {config.chunk_range[0]} to {config.chunk_range[1]}')
|
277
|
-
start_chunk, end_chunk = config.chunk_range
|
278
|
-
else:
|
279
|
-
start_chunk, end_chunk = 1, total_chunk_number_found
|
280
|
-
else:
|
281
|
-
all_chunk = config.all_chunk
|
282
|
-
print(f'using {all_chunk} chunked files by provided argument')
|
283
|
-
print(f'\t')
|
284
|
-
print(f'Input {all_chunk} chunked files')
|
285
|
-
start_chunk, end_chunk = 1, all_chunk
|
223
|
+
total_chunk_number_found = determine_total_chunks(config)
|
286
224
|
|
225
|
+
start_chunk, end_chunk = determine_chunk_range(config, total_chunk_number_found)
|
287
226
|
running_chunk_number = end_chunk - start_chunk + 1
|
288
227
|
|
289
|
-
#
|
290
|
-
|
291
|
-
zarr_path = Path(config.ldscore_save_dir) / f'{config.sample_name}.ldscore.zarr'
|
228
|
+
# Load zarr file if needed
|
229
|
+
zarr_file, spots_name = None, None
|
292
230
|
if config.ldscore_save_format == 'zarr':
|
293
|
-
|
231
|
+
zarr_path = Path(config.ldscore_save_dir) / f'{config.sample_name}.ldscore.zarr'
|
232
|
+
if not zarr_path.exists():
|
233
|
+
raise FileNotFoundError(f'{zarr_path} not found, which is required for zarr format')
|
294
234
|
zarr_file = zarr.open(str(zarr_path))
|
295
235
|
spots_name = zarr_file.attrs['spot_names']
|
296
236
|
|
237
|
+
output_dict = defaultdict(list)
|
297
238
|
for chunk_index in range(start_chunk, end_chunk + 1):
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
ref_ld_spatial = ref_ld_spatial.astype(np.float32, copy=False)
|
307
|
-
spatial_annotation_cnames = spots_name[start_spot:start_spot + zarr_file.chunks[1]]
|
308
|
-
elif config.ldscore_save_format == 'quick_mode':
|
309
|
-
ref_ld_spatial, spatial_annotation_cnames = s_ldsc.fetch_ldscore_by_chunk(chunk_index - 1)
|
310
|
-
else:
|
311
|
-
raise ValueError(f'Invalid ld score save format: {config.ldscore_save_format}')
|
312
|
-
|
313
|
-
# get the x_tot_precomputed matrix by adding baseline and spatial annotation
|
239
|
+
ref_ld_spatial, spatial_annotation_cnames = load_ldscore_chunk(
|
240
|
+
chunk_index,
|
241
|
+
common_snp_among_all_sumstats_pos,
|
242
|
+
config,
|
243
|
+
zarr_file,
|
244
|
+
spots_name,
|
245
|
+
s_ldsc # Pass s_ldsc to the function
|
246
|
+
)
|
314
247
|
ref_ld_baseline_column_sum = ref_ld_baseline.sum(axis=1).values
|
315
|
-
# x_tot_precomputed = ref_ld_spatial + ref_ld_baseline_column_sum
|
316
248
|
|
317
249
|
for trait_name, sumstats in sumstats_cleaned_dict.items():
|
318
|
-
|
319
250
|
spatial_annotation = ref_ld_spatial.astype(np.float32, copy=False)
|
320
251
|
baseline_annotation = ref_ld_baseline.copy().astype(np.float32, copy=False)
|
321
252
|
w_ld_common_snp = w_ld.astype(np.float32, copy=False)
|
322
253
|
|
323
|
-
# weight the baseline annotation by N
|
324
254
|
baseline_annotation = baseline_annotation * sumstats.N.values.reshape((-1, 1)) / sumstats.N.mean()
|
325
|
-
# append intercept
|
326
255
|
baseline_annotation = append_intercept(baseline_annotation)
|
327
256
|
|
328
|
-
# Run the jackknife
|
329
257
|
Nbar = sumstats.N.mean()
|
330
258
|
chunk_size = spatial_annotation.shape[1]
|
331
|
-
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
259
|
+
|
260
|
+
jackknife_func = partial(
|
261
|
+
jackknife_for_processmap,
|
262
|
+
spatial_annotation=spatial_annotation,
|
263
|
+
ref_ld_baseline_column_sum=ref_ld_baseline_column_sum,
|
264
|
+
sumstats=sumstats,
|
265
|
+
baseline_annotation=baseline_annotation,
|
266
|
+
w_ld_common_snp=w_ld_common_snp,
|
267
|
+
Nbar=Nbar,
|
268
|
+
n_blocks=n_blocks
|
269
|
+
)
|
270
|
+
|
271
|
+
out_chunk = thread_map(
|
272
|
+
jackknife_func,
|
273
|
+
range(chunk_size),
|
274
|
+
max_workers=config.num_processes,
|
275
|
+
chunksize=10,
|
276
|
+
desc=f'Chunk-{chunk_index}/Total-chunk-{running_chunk_number} for {trait_name}',
|
277
|
+
)
|
278
|
+
|
279
|
+
out_chunk = pd.DataFrame.from_records(out_chunk, columns=['beta', 'se'], index=spatial_annotation_cnames)
|
342
280
|
nan_spots = out_chunk[out_chunk.isna().any(axis=1)].index
|
343
281
|
if len(nan_spots) > 0:
|
344
282
|
logger.info(f'Nan spots: {nan_spots} in chunk-{chunk_index} for {trait_name}. They are removed.')
|
345
|
-
# drop the nan
|
346
283
|
out_chunk = out_chunk.dropna()
|
347
|
-
|
348
284
|
out_chunk['z'] = out_chunk.beta / out_chunk.se
|
349
285
|
out_chunk['p'] = norm.sf(out_chunk['z'])
|
350
286
|
output_dict[trait_name].append(out_chunk)
|
351
287
|
|
352
|
-
|
353
|
-
|
288
|
+
del spatial_annotation, baseline_annotation, w_ld_common_snp
|
289
|
+
gc.collect()
|
290
|
+
|
291
|
+
save_results(output_dict, config, running_chunk_number, start_chunk, end_chunk)
|
292
|
+
logger.info(f'------Spatial LDSC for {sample_name} finished!')
|
293
|
+
|
354
294
|
|
355
|
-
|
295
|
+
def determine_total_chunks(config):
|
296
|
+
"""Determine total number of chunks based on the ldscore save format."""
|
297
|
+
if config.ldscore_save_format == 'quick_mode':
|
298
|
+
s_ldsc = S_LDSC_Boost_with_pre_calculate_SNP_Gene_weight_matrix(config, [])
|
299
|
+
total_chunk_number_found = len(s_ldsc.chunk_starts)
|
300
|
+
logger.info(f'Split data into {total_chunk_number_found} chunks')
|
301
|
+
else:
|
302
|
+
all_file = os.listdir(config.ldscore_save_dir)
|
303
|
+
total_chunk_number_found = sum('chunk' in name for name in all_file)
|
304
|
+
logger.info(f'Find {total_chunk_number_found} chunked files in {config.ldscore_save_dir}')
|
305
|
+
return total_chunk_number_found
|
306
|
+
|
307
|
+
|
308
|
+
def determine_chunk_range(config, total_chunk_number_found):
|
309
|
+
"""Determine the range of chunks to process."""
|
310
|
+
if config.all_chunk is None:
|
311
|
+
if config.chunk_range is not None:
|
312
|
+
if not (1 <= config.chunk_range[0] <= total_chunk_number_found) or not (1 <= config.chunk_range[1] <= total_chunk_number_found):
|
313
|
+
raise ValueError('Chunk range out of bound. It should be in [1, all_chunk]')
|
314
|
+
start_chunk, end_chunk = config.chunk_range
|
315
|
+
logger.info(f'Chunk range provided, using chunked files from {start_chunk} to {end_chunk}')
|
316
|
+
else:
|
317
|
+
start_chunk, end_chunk = 1, total_chunk_number_found
|
318
|
+
else:
|
319
|
+
all_chunk = config.all_chunk
|
320
|
+
logger.info(f'Using {all_chunk} chunked files by provided argument')
|
321
|
+
start_chunk, end_chunk = 1, all_chunk
|
322
|
+
return start_chunk, end_chunk
|
323
|
+
|
324
|
+
|
325
|
+
def load_ldscore_chunk(chunk_index, common_snp_among_all_sumstats_pos, config, zarr_file=None, spots_name=None, s_ldsc=None):
|
326
|
+
"""Load LD score chunk based on save format."""
|
327
|
+
if config.ldscore_save_format == 'feather':
|
328
|
+
return load_ldscore_chunk_from_feather(chunk_index, common_snp_among_all_sumstats_pos, config)
|
329
|
+
elif config.ldscore_save_format == 'zarr':
|
330
|
+
ref_ld_spatial = zarr_file.blocks[:, chunk_index - 1][common_snp_among_all_sumstats_pos]
|
331
|
+
start_spot = (chunk_index - 1) * zarr_file.chunks[1]
|
332
|
+
ref_ld_spatial = ref_ld_spatial.astype(np.float32, copy=False)
|
333
|
+
spatial_annotation_cnames = spots_name[start_spot:start_spot + zarr_file.chunks[1]]
|
334
|
+
return ref_ld_spatial, spatial_annotation_cnames
|
335
|
+
elif config.ldscore_save_format == 'quick_mode':
|
336
|
+
# Use the pre-initialized s_ldsc
|
337
|
+
if s_ldsc is None:
|
338
|
+
raise ValueError("s_ldsc must be provided in quick_mode")
|
339
|
+
return s_ldsc.fetch_ldscore_by_chunk(chunk_index - 1)
|
340
|
+
else:
|
341
|
+
raise ValueError(f'Invalid ld score save format: {config.ldscore_save_format}')
|
342
|
+
|
343
|
+
|
344
|
+
def save_results(output_dict, config, running_chunk_number, start_chunk, end_chunk):
|
345
|
+
"""Save the results to the specified directory."""
|
356
346
|
out_dir = config.ldsc_save_dir
|
357
347
|
for trait_name, out_chunk_list in output_dict.items():
|
358
348
|
out_all = pd.concat(out_chunk_list, axis=0)
|
359
|
-
|
349
|
+
sample_name = config.sample_name
|
350
|
+
if running_chunk_number == end_chunk - start_chunk + 1:
|
360
351
|
out_file_name = out_dir / f'{sample_name}_{trait_name}.csv.gz'
|
361
352
|
else:
|
362
353
|
out_file_name = out_dir / f'{sample_name}_{trait_name}_chunk{start_chunk}-{end_chunk}.csv.gz'
|
363
354
|
out_all['spot'] = out_all.index
|
364
355
|
out_all = out_all[['spot', 'beta', 'se', 'z', 'p']]
|
365
|
-
out_all.to_csv(out_file_name, compression='gzip', index=False)
|
366
|
-
|
367
|
-
logger.info(f'Output saved to {out_file_name} for {trait_name}')
|
368
|
-
logger.info(f'------Spatial LDSC for {sample_name} finished!')
|
369
|
-
|
370
|
-
|
371
|
-
def load_ldscore_chunk_from_feather(chunk_index, common_snp_among_all_sumstats_pos, config, ):
|
372
|
-
# Load the spatial annotations for this chunk
|
373
|
-
sample_name = config.sample_name
|
374
|
-
ld_file_spatial = f'{config.ldscore_save_dir}/{sample_name}_chunk{chunk_index}/{sample_name}.'
|
375
|
-
ref_ld_spatial = _read_ref_ld_v2(ld_file_spatial)
|
376
|
-
ref_ld_spatial = ref_ld_spatial.iloc[common_snp_among_all_sumstats_pos]
|
377
|
-
ref_ld_spatial = ref_ld_spatial.astype(np.float32, copy=False)
|
378
356
|
|
379
|
-
|
380
|
-
|
357
|
+
# clip the p-values
|
358
|
+
out_all['p'] = out_all['p'].clip(1e-300, 1)
|
359
|
+
out_all.to_csv(out_file_name, compression='gzip', index=False)
|
360
|
+
logger.info(f'Output saved to {out_file_name} for {trait_name}')
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.3
|
2
2
|
Name: gsMap
|
3
|
-
Version: 1.71
|
3
|
+
Version: 1.71.2
|
4
4
|
Summary: Genetics-informed pathogenic spatial mapping
|
5
5
|
Author-email: liyang <songliyang@westlake.edu.cn>, wenhao <chenwenhao@westlake.edu.cn>
|
6
6
|
Requires-Python: >=3.8
|
@@ -11,7 +11,7 @@ Classifier: License :: OSI Approved :: MIT License
|
|
11
11
|
Classifier: Programming Language :: Python :: 3.8
|
12
12
|
Classifier: Programming Language :: Python :: 3.9
|
13
13
|
Classifier: Operating System :: POSIX :: Linux
|
14
|
-
Requires-Dist: numpy
|
14
|
+
Requires-Dist: numpy < 2.0.0
|
15
15
|
Requires-Dist: pandas
|
16
16
|
Requires-Dist: scipy
|
17
17
|
Requires-Dist: scikit-learn
|
@@ -28,7 +28,7 @@ Requires-Dist: kaleido
|
|
28
28
|
Requires-Dist: jinja2
|
29
29
|
Requires-Dist: scanpy >=1.8.0
|
30
30
|
Requires-Dist: zarr
|
31
|
-
Requires-Dist: bitarray
|
31
|
+
Requires-Dist: bitarray >=2.9.2, <3.0.0
|
32
32
|
Requires-Dist: pyarrow
|
33
33
|
Requires-Dist: scikit-misc
|
34
34
|
Requires-Dist: sphinx ; extra == "doc"
|
@@ -1,21 +1,21 @@
|
|
1
|
-
gsMap/__init__.py,sha256=
|
1
|
+
gsMap/__init__.py,sha256=TKJMAFwuDpLdq1rs6HhXdBwKqbdPXrH9doabIfnGCRg,76
|
2
2
|
gsMap/__main__.py,sha256=Ih3PPSJezd9UpmkdfH1PXYYecCpk6hkQF039BQU1ocs,60
|
3
3
|
gsMap/cauchy_combination_test.py,sha256=jMXNKWajI5NSmKeKu5PVJMYMtiApL6ZeYrSOSqC-ESU,5553
|
4
|
-
gsMap/config.py,sha256=
|
5
|
-
gsMap/diagnosis.py,sha256=
|
6
|
-
gsMap/find_latent_representation.py,sha256=
|
4
|
+
gsMap/config.py,sha256=eddRqfld9vgkjrXII2wKVbzAKJuiZzxvp6e4HqYb0cw,40386
|
5
|
+
gsMap/diagnosis.py,sha256=MMOrKn_TV1fWeSbgXQ2gar9uYHelJUzzEK4WTId5ei0,13117
|
6
|
+
gsMap/find_latent_representation.py,sha256=ZarK-rjjBu3UUJqz0GxYsxcxE5A6RdZdeSC5bSP4x48,4807
|
7
7
|
gsMap/format_sumstats.py,sha256=vPJD5qdqd_CjHw5MkihYpSMLcDx7wsNySsmoXRElaW0,13439
|
8
8
|
gsMap/generate_ldscore.py,sha256=s0evdoGJ2QKZYMxHGm_z01yf7yE6RWa1iUOJzSh-OZY,28471
|
9
|
-
gsMap/latent_to_gene.py,sha256=
|
9
|
+
gsMap/latent_to_gene.py,sha256=HtO3RSHQ_yeicqGC8iJt65OIq_akoKJhUfIdfb3k3kk,10622
|
10
10
|
gsMap/main.py,sha256=b3pLWvc70UlO24QXW9Zl7Nd7rlErxlQSM5Y_RqwcNNM,1254
|
11
11
|
gsMap/report.py,sha256=pr7K8zEyunun5LHzRtLBaoLjsw8-PfdbJWKMvsTxoj0,6803
|
12
12
|
gsMap/run_all_mode.py,sha256=fXO5QNZbkQpQjkZTznqq5Ct1OihIRiFb4iwGHtnEryA,8194
|
13
13
|
gsMap/setup.py,sha256=eOoPalGAHTY06_7a35nvbOaKQhq0SBE5GK4pc4bp3wc,102
|
14
|
-
gsMap/spatial_ldsc_multiple_sumstats.py,sha256=
|
14
|
+
gsMap/spatial_ldsc_multiple_sumstats.py,sha256=YrqTWC7FHmimGhhh-aoxIUzwouPFI02fbimBByu3gVw,17431
|
15
15
|
gsMap/visualize.py,sha256=pLp8iyh0L7UU405LfpjEgbGuKbf19_4YyMHZWo6wE7M,7255
|
16
16
|
gsMap/GNN/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
gsMap/GNN/adjacency_matrix.py,sha256=nlzIW4b6JSf77SCsjSk75eSEGiwBZLEQQ0D0mpIy4go,3258
|
18
|
-
gsMap/GNN/model.py,sha256=
|
18
|
+
gsMap/GNN/model.py,sha256=W4jPnaSDUUEacvGH5sNACNlbbO79-6hwLtSudyXIYgM,2909
|
19
19
|
gsMap/GNN/train.py,sha256=s9F3_eeOlTzqOra2jHvtV3SDp6fKkt_AVTnssNueruQ,3094
|
20
20
|
gsMap/templates/report_template.html,sha256=QODZEbVxpW1xsLz7lDrD_DyUfzYoi9E17o2tLJlf8OQ,8016
|
21
21
|
gsMap/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -24,8 +24,8 @@ gsMap/utils/jackknife.py,sha256=d3OVLAoU8W0MYdZBT4-8GPAdZDN9aNHnDHwSh4AkxP8,1773
|
|
24
24
|
gsMap/utils/make_annotations.py,sha256=X9vxrH8A0oqrGJ-C82H3yactMDLpSM-mYA_8DurxPzE,21679
|
25
25
|
gsMap/utils/manhattan_plot.py,sha256=vS8Avgn9kbYa6HerRKpRNPYgJyyh5mE2f9p1BuHwwS8,25596
|
26
26
|
gsMap/utils/regression_read.py,sha256=oivyNicRdLLQvBv36GoR0AweUlvuCmBduTVRPpGhNRU,8474
|
27
|
-
gsmap-1.71.dist-info/entry_points.txt,sha256=s_P2Za22O077tc1FPLKMinbdRVXaN_HTcDBgWMYpqA4,41
|
28
|
-
gsmap-1.71.dist-info/LICENSE,sha256=dCWx-ENipnYph2UTEA9wJaEZ_tkjNZ_tog6XRd3nd2k,1073
|
29
|
-
gsmap-1.71.dist-info/WHEEL,sha256
|
30
|
-
gsmap-1.71.dist-info/METADATA,sha256=
|
31
|
-
gsmap-1.71.dist-info/RECORD,,
|
27
|
+
gsmap-1.71.2.dist-info/entry_points.txt,sha256=s_P2Za22O077tc1FPLKMinbdRVXaN_HTcDBgWMYpqA4,41
|
28
|
+
gsmap-1.71.2.dist-info/LICENSE,sha256=dCWx-ENipnYph2UTEA9wJaEZ_tkjNZ_tog6XRd3nd2k,1073
|
29
|
+
gsmap-1.71.2.dist-info/WHEEL,sha256=-ta_u8-23-bwm4JkHhR_rJkpb_X1cxtvIsLH0KOv1sU,82
|
30
|
+
gsmap-1.71.2.dist-info/METADATA,sha256=sObxX5A_Pt26uqYQ8G4gpkrthY81qvkC4p1pon4j-sM,3717
|
31
|
+
gsmap-1.71.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|