gsMap 1.71.1__py3-none-any.whl → 1.72.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gsMap/GNN/__init__.py +0 -0
- gsMap/GNN/adjacency_matrix.py +73 -75
- gsMap/GNN/model.py +92 -90
- gsMap/GNN/train.py +8 -11
- gsMap/__init__.py +5 -5
- gsMap/__main__.py +4 -3
- gsMap/cauchy_combination_test.py +144 -141
- gsMap/config.py +1312 -805
- gsMap/create_slice_mean.py +154 -0
- gsMap/diagnosis.py +352 -273
- gsMap/find_latent_representation.py +141 -133
- gsMap/format_sumstats.py +439 -407
- gsMap/generate_ldscore.py +762 -618
- gsMap/latent_to_gene.py +284 -234
- gsMap/main.py +40 -31
- gsMap/report.py +174 -160
- gsMap/run_all_mode.py +235 -195
- gsMap/setup.py +1 -1
- gsMap/spatial_ldsc_multiple_sumstats.py +434 -380
- gsMap/templates/report_template.html +198 -198
- gsMap/utils/__init__.py +0 -0
- gsMap/utils/generate_r2_matrix.py +768 -735
- gsMap/utils/jackknife.py +518 -514
- gsMap/utils/manhattan_plot.py +612 -639
- gsMap/utils/regression_read.py +277 -294
- gsMap/visualize.py +217 -199
- {gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/LICENSE +21 -21
- {gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/METADATA +23 -8
- gsmap-1.72.3.dist-info/RECORD +31 -0
- {gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/WHEEL +1 -1
- gsMap/utils/make_annotations.py +0 -518
- gsmap-1.71.1.dist-info/RECORD +0 -31
- {gsmap-1.71.1.dist-info → gsmap-1.72.3.dist-info}/entry_points.txt +0 -0
gsMap/cauchy_combination_test.py
CHANGED
@@ -1,141 +1,144 @@
|
|
1
|
-
import logging
|
2
|
-
from pathlib import Path
|
3
|
-
|
4
|
-
import numpy as np
|
5
|
-
import pandas as pd
|
6
|
-
import scanpy as sc
|
7
|
-
import scipy as sp
|
8
|
-
|
9
|
-
from gsMap.config import CauchyCombinationConfig
|
10
|
-
|
11
|
-
logger = logging.getLogger(__name__)
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
if any(
|
33
|
-
raise Exception("
|
34
|
-
if any(
|
35
|
-
raise Exception("
|
36
|
-
if any(
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
elif
|
47
|
-
raise Exception("
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
pval = 1
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
# Load the
|
79
|
-
logger.info(f
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
ldsc = ldsc
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
ldsc =
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
output_file =
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
1
|
+
import logging
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
import numpy as np
|
5
|
+
import pandas as pd
|
6
|
+
import scanpy as sc
|
7
|
+
import scipy as sp
|
8
|
+
|
9
|
+
from gsMap.config import CauchyCombinationConfig
|
10
|
+
|
11
|
+
logger = logging.getLogger(__name__)
|
12
|
+
|
13
|
+
|
14
|
+
# The fun of cauchy combination
|
15
|
+
def acat_test(pvalues, weights=None):
|
16
|
+
"""acat_test()
|
17
|
+
Aggregated Cauchy Assocaition Test
|
18
|
+
A p-value combination method using the Cauchy distribution.
|
19
|
+
|
20
|
+
Inspired by: https://github.com/yaowuliu/ACAT/blob/master/R/ACAT.R
|
21
|
+
Inputs:
|
22
|
+
pvalues: <list or numpy array>
|
23
|
+
The p-values you want to combine.
|
24
|
+
weights: <list or numpy array>, default=None
|
25
|
+
The weights for each of the p-values. If None, equal weights are used.
|
26
|
+
|
27
|
+
Returns
|
28
|
+
-------
|
29
|
+
pval: <float>
|
30
|
+
The ACAT combined p-value.
|
31
|
+
"""
|
32
|
+
if any(np.isnan(pvalues)):
|
33
|
+
raise Exception("Cannot have NAs in the p-values.")
|
34
|
+
if any((i > 1) | (i < 0) for i in pvalues):
|
35
|
+
raise Exception("P-values must be between 0 and 1.")
|
36
|
+
if any(i == 1 for i in pvalues) & any(i == 0 for i in pvalues):
|
37
|
+
raise Exception("Cannot have both 0 and 1 p-values.")
|
38
|
+
if any(i == 0 for i in pvalues):
|
39
|
+
logger.info("Warn: p-values are exactly 0.")
|
40
|
+
return 0
|
41
|
+
if any(i == 1 for i in pvalues):
|
42
|
+
logger.info("Warn: p-values are exactly 1.")
|
43
|
+
return 1
|
44
|
+
if weights is None:
|
45
|
+
weights = [1 / len(pvalues) for i in pvalues]
|
46
|
+
elif len(weights) != len(pvalues):
|
47
|
+
raise Exception("Length of weights and p-values differs.")
|
48
|
+
elif any(i < 0 for i in weights):
|
49
|
+
raise Exception("All weights must be positive.")
|
50
|
+
else:
|
51
|
+
weights = [i / len(weights) for i in weights]
|
52
|
+
|
53
|
+
pvalues = np.array(pvalues)
|
54
|
+
weights = np.array(weights)
|
55
|
+
|
56
|
+
if not any(i < 1e-16 for i in pvalues):
|
57
|
+
cct_stat = sum(weights * np.tan((0.5 - pvalues) * np.pi))
|
58
|
+
else:
|
59
|
+
is_small = [i < (1e-16) for i in pvalues]
|
60
|
+
is_large = [i >= (1e-16) for i in pvalues]
|
61
|
+
cct_stat = sum((weights[is_small] / pvalues[is_small]) / np.pi)
|
62
|
+
cct_stat += sum(weights[is_large] * np.tan((0.5 - pvalues[is_large]) * np.pi))
|
63
|
+
|
64
|
+
if cct_stat > 1e15:
|
65
|
+
pval = (1 / cct_stat) / np.pi
|
66
|
+
else:
|
67
|
+
pval = 1 - sp.stats.cauchy.cdf(cct_stat)
|
68
|
+
|
69
|
+
return pval
|
70
|
+
|
71
|
+
|
72
|
+
def run_Cauchy_combination(config: CauchyCombinationConfig):
|
73
|
+
ldsc_list = []
|
74
|
+
|
75
|
+
for sample_name in config.sample_name_list:
|
76
|
+
config.sample_name = sample_name
|
77
|
+
|
78
|
+
# Load the LDSC results for the current sample
|
79
|
+
logger.info(f"------Loading LDSC results for sample {sample_name}...")
|
80
|
+
ldsc_input_file = config.get_ldsc_result_file(
|
81
|
+
trait_name=config.trait_name,
|
82
|
+
)
|
83
|
+
ldsc = pd.read_csv(ldsc_input_file, compression="gzip")
|
84
|
+
ldsc["spot"] = ldsc["spot"].astype(str)
|
85
|
+
ldsc.index = ldsc["spot"]
|
86
|
+
|
87
|
+
# Load the spatial transcriptomics (ST) data for the current sample
|
88
|
+
logger.info(f"------Loading ST data for sample {sample_name}...")
|
89
|
+
h5ad_file = config.hdf5_with_latent_path
|
90
|
+
adata = sc.read_h5ad(h5ad_file)
|
91
|
+
|
92
|
+
# Identify common cells between LDSC results and ST data
|
93
|
+
common_cells = np.intersect1d(ldsc.index, adata.obs_names)
|
94
|
+
adata = adata[common_cells]
|
95
|
+
ldsc = ldsc.loc[common_cells]
|
96
|
+
|
97
|
+
# Add annotations to the LDSC dataframe
|
98
|
+
ldsc["annotation"] = adata.obs.loc[ldsc.spot, config.annotation].to_list()
|
99
|
+
ldsc_list.append(ldsc)
|
100
|
+
|
101
|
+
# Concatenate all LDSC dataframes from different samples
|
102
|
+
ldsc_all = pd.concat(ldsc_list)
|
103
|
+
|
104
|
+
# Run the Cauchy combination
|
105
|
+
p_cauchy = []
|
106
|
+
p_median = []
|
107
|
+
annotations = ldsc_all["annotation"].unique()
|
108
|
+
|
109
|
+
for ct in annotations:
|
110
|
+
p_values = ldsc_all.loc[ldsc_all["annotation"] == ct, "p"]
|
111
|
+
|
112
|
+
# Handle extreme outliers to enhance robustness
|
113
|
+
p_values_log = -np.log10(p_values)
|
114
|
+
median_log = np.median(p_values_log)
|
115
|
+
iqr_log = np.percentile(p_values_log, 75) - np.percentile(p_values_log, 25)
|
116
|
+
|
117
|
+
p_values_filtered = p_values[p_values_log < median_log + 3 * iqr_log]
|
118
|
+
n_removed = len(p_values) - len(p_values_filtered)
|
119
|
+
|
120
|
+
# Remove outliers if the number is reasonable
|
121
|
+
if 0 < n_removed < 20:
|
122
|
+
logger.info(f"Removed {n_removed}/{len(p_values)} outliers (median + 3IQR) for {ct}.")
|
123
|
+
p_cauchy_temp = acat_test(p_values_filtered)
|
124
|
+
else:
|
125
|
+
p_cauchy_temp = acat_test(p_values)
|
126
|
+
|
127
|
+
p_median_temp = np.median(p_values)
|
128
|
+
p_cauchy.append(p_cauchy_temp)
|
129
|
+
p_median.append(p_median_temp)
|
130
|
+
|
131
|
+
# Prepare the results dataframe
|
132
|
+
results = pd.DataFrame({"annotation": annotations, "p_cauchy": p_cauchy, "p_median": p_median})
|
133
|
+
results.sort_values(by="p_cauchy", inplace=True)
|
134
|
+
|
135
|
+
# Save the results
|
136
|
+
Path(config.output_file).parent.mkdir(parents=True, exist_ok=True, mode=0o755)
|
137
|
+
output_file = Path(config.output_file)
|
138
|
+
results.to_csv(
|
139
|
+
output_file,
|
140
|
+
compression="gzip",
|
141
|
+
index=False,
|
142
|
+
)
|
143
|
+
logger.info(f"Cauchy combination results saved at {output_file}.")
|
144
|
+
return results
|