gsMap 1.63__tar.gz → 1.65__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {gsmap-1.63 → gsmap-1.65}/PKG-INFO +6 -1
- {gsmap-1.63 → gsmap-1.65}/pyproject.toml +5 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/__init__.py +1 -1
- gsmap-1.65/src/gsMap/setup.py +5 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/spatial_ldsc_multiple_sumstats.py +2 -2
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/generate_r2_matrix.py +1 -1
- gsmap-1.63/test/GPS-snakemake-workflow-macaque.smk +0 -268
- gsmap-1.63/test/GPS-snakemake-workflow.smk +0 -229
- {gsmap-1.63 → gsmap-1.65}/.github/workflows/publish-to-pypi.yml +0 -0
- {gsmap-1.63 → gsmap-1.65}/.gitignore +0 -0
- {gsmap-1.63 → gsmap-1.65}/LICENSE +0 -0
- {gsmap-1.63 → gsmap-1.65}/README.md +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/Makefile +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/make.bat +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/requirements.txt +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/_static/schematic.svg +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/api/cauchy_combination.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/api/find_latent_representations.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/api/format_sumstats.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/api/generate_ldscore.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/api/latent_to_gene.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/api/quick_mode.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/api/report.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/api/spatial_ldsc.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/api.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_Height.json +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_IQ.json +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_MCHC.json +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/charts/cortex/Cortex_151507_SCZ.json +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_Height.json +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_IQ.json +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_MCHC.json +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/charts/mouse_embryo/E16.5_E1S1_SCZ.json +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/charts/test.json +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/conf.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/data.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/data_format.md +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/index.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/install.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/mouse.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/mouse_example.md +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/quick_mode.md +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/release.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/docs/source/tutorials.rst +0 -0
- {gsmap-1.63 → gsmap-1.65}/schematic.png +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/GNN_VAE/__init__.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/GNN_VAE/adjacency_matrix.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/GNN_VAE/model.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/GNN_VAE/train.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/__main__.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/cauchy_combination_test.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/config.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/diagnosis.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/find_latent_representation.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/format_sumstats.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/generate_ldscore.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/latent_to_gene.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/main.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/report.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/run_all_mode.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/templates/report_template.html +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/__init__.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/jackknife.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/make_annotations.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/manhattan_plot.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/utils/regression_read.py +0 -0
- {gsmap-1.63 → gsmap-1.65}/src/gsMap/visualize.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: gsMap
|
3
|
-
Version: 1.
|
3
|
+
Version: 1.65
|
4
4
|
Summary: Genetics-informed pathogenic spatial mapping
|
5
5
|
Author-email: liyang <songliyang@westlake.edu.cn>, wenhao <chenwenhao@westlake.edu.cn>
|
6
6
|
Requires-Python: >=3.8
|
@@ -27,6 +27,11 @@ Requires-Dist: pyfiglet
|
|
27
27
|
Requires-Dist: plotly
|
28
28
|
Requires-Dist: kaleido
|
29
29
|
Requires-Dist: jinja2
|
30
|
+
Requires-Dist: scanpy
|
31
|
+
Requires-Dist: zarr
|
32
|
+
Requires-Dist: bitarray
|
33
|
+
Requires-Dist: pyarrow
|
34
|
+
Requires-Dist: scikit-misc
|
30
35
|
Requires-Dist: sphinx ; extra == "doc"
|
31
36
|
Requires-Dist: sphinx-argparse ; extra == "doc"
|
32
37
|
Requires-Dist: sphinx-autobuild ; extra == "doc"
|
@@ -15,7 +15,7 @@ import gsMap.utils.jackknife as jk
|
|
15
15
|
from gsMap.config import SpatialLDSCConfig
|
16
16
|
from gsMap.utils.regression_read import _read_sumstats, _read_w_ld, _read_ref_ld_v2
|
17
17
|
|
18
|
-
logger = logging.getLogger(
|
18
|
+
logger = logging.getLogger('gsMap.spatial_ldsc')
|
19
19
|
|
20
20
|
|
21
21
|
# %%
|
@@ -204,7 +204,7 @@ def _get_sumstats_with_common_snp_from_sumstats_dict(sumstats_config_dict: dict,
|
|
204
204
|
for trait_name, sumstats in sumstats_cleaned_dict.items():
|
205
205
|
sumstats_cleaned_dict[trait_name] = sumstats.loc[common_snp_among_all_sumstats]
|
206
206
|
|
207
|
-
logger.info(f'
|
207
|
+
logger.info(f'Common SNPs among all sumstats: {len(common_snp_among_all_sumstats)}')
|
208
208
|
return sumstats_cleaned_dict, common_snp_among_all_sumstats
|
209
209
|
|
210
210
|
|
@@ -1,268 +0,0 @@
|
|
1
|
-
from pathlib import Path
|
2
|
-
|
3
|
-
import numpy as np
|
4
|
-
|
5
|
-
workdir: '/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/macaque/processed'
|
6
|
-
# workdir: '/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/GPS_test/macaque'
|
7
|
-
sample_name = "Cortex_151507"
|
8
|
-
chrom = "all"
|
9
|
-
QOS = "huge"
|
10
|
-
# chrom = range(1,23)
|
11
|
-
trait_names = [
|
12
|
-
'PGC3_SCZ_wave3_public_INFO80'
|
13
|
-
]
|
14
|
-
root = "/storage/yangjianLab/songliyang/SpatialData/Data/Brain/macaque/Cell/processed/h5ad"
|
15
|
-
# sample_names = [file.strip().split('.')[0]
|
16
|
-
# for file in open(f'{root}/representative_slices2').readlines()]
|
17
|
-
#
|
18
|
-
# sample_names = '''
|
19
|
-
# T33_macaque1 T44_macaque1 T82_macaque1 T97_macaque1 T125_macaque1 T127_macaque1 T129_macaque1 T131_macaque1 T135_macaque1 T137_macaque1 T139_macaque1
|
20
|
-
# '''.strip().split()
|
21
|
-
sample_names=[]
|
22
|
-
for file in Path(root).glob('*.h5ad'):
|
23
|
-
sample_names.append(file.stem)
|
24
|
-
sample_names.remove('T825_macaque3') # due to 25% of spot don't have spatial coordinates
|
25
|
-
|
26
|
-
annotation = "SubClass"
|
27
|
-
data_type = "SCT"
|
28
|
-
# sample_names = ['T584_macaque2']
|
29
|
-
|
30
|
-
rule all:
|
31
|
-
input:
|
32
|
-
expand('{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done',trait_name=trait_names,sample_name=sample_names)
|
33
|
-
|
34
|
-
|
35
|
-
# expand('{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz',trait_name=trait_names,sample_name=sample_names)
|
36
|
-
# expand('{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz',trait_name=trait_names,sample_name=sample_names)
|
37
|
-
|
38
|
-
rule test_run:
|
39
|
-
input:
|
40
|
-
[f'{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done' for sample_name in
|
41
|
-
sample_names]
|
42
|
-
|
43
|
-
# localrules: find_latent_representations,latent_to_gene
|
44
|
-
def get_annotation(wildcards):
|
45
|
-
if wildcards.sample_name.endswith('3'):
|
46
|
-
print(wildcards.sample_name,'will use None as annotation')
|
47
|
-
return None
|
48
|
-
else:
|
49
|
-
print(wildcards.sample_name,'will use SubClass as annotation')
|
50
|
-
return 'SubClass'
|
51
|
-
|
52
|
-
|
53
|
-
rule find_latent_representations:
|
54
|
-
input:
|
55
|
-
hdf5_path=f'{root}/{{sample_name}}.h5ad'
|
56
|
-
output:
|
57
|
-
hdf5_output='{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad'
|
58
|
-
params:
|
59
|
-
annotation= get_annotation,
|
60
|
-
type=data_type,
|
61
|
-
epochs=300,
|
62
|
-
feat_hidden1=256,
|
63
|
-
feat_hidden2=128,
|
64
|
-
feat_cell=3000,
|
65
|
-
gcn_hidden1=64,
|
66
|
-
gcn_hidden2=30,
|
67
|
-
p_drop=0.1,
|
68
|
-
gcn_lr=0.001,
|
69
|
-
gcn_decay=0.01,
|
70
|
-
n_neighbors=11,
|
71
|
-
label_w=1,
|
72
|
-
rec_w=1,
|
73
|
-
n_comps=300,
|
74
|
-
weighted_adj=False,
|
75
|
-
nheads=3,
|
76
|
-
var=False,
|
77
|
-
convergence_threshold=1e-4,
|
78
|
-
hierarchically=False
|
79
|
-
threads:
|
80
|
-
3
|
81
|
-
benchmark: '{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad.benchmark'
|
82
|
-
resources:
|
83
|
-
mem_mb_per_cpu=lambda wildcards, threads, attempt: 20_000 * np.log2(attempt + 1),
|
84
|
-
qos=QOS
|
85
|
-
run:
|
86
|
-
command = f"""
|
87
|
-
gsmap run_find_latent_representations \
|
88
|
-
--input_hdf5_path {input.hdf5_path} \
|
89
|
-
--sample_name {wildcards.sample_name} \
|
90
|
-
--output_hdf5_path {output.hdf5_output} \
|
91
|
-
{ '--annotation ' + params.annotation if params.annotation is not None else ''} \
|
92
|
-
--type {params.type} \
|
93
|
-
--epochs {params.epochs} \
|
94
|
-
--feat_hidden1 {params.feat_hidden1} \
|
95
|
-
--feat_hidden2 {params.feat_hidden2} \
|
96
|
-
--feat_cell {params.feat_cell} \
|
97
|
-
--gcn_hidden1 {params.gcn_hidden1} \
|
98
|
-
--gcn_hidden2 {params.gcn_hidden2} \
|
99
|
-
--p_drop {params.p_drop} \
|
100
|
-
--gcn_lr {params.gcn_lr} \
|
101
|
-
--gcn_decay {params.gcn_decay} \
|
102
|
-
--n_neighbors {params.n_neighbors} \
|
103
|
-
--label_w {params.label_w} \
|
104
|
-
--rec_w {params.rec_w} \
|
105
|
-
--n_comps {params.n_comps} \
|
106
|
-
{'--weighted_adj' if params.weighted_adj else ''} \
|
107
|
-
--nheads {params.nheads} \
|
108
|
-
{'--var' if params.var else ''} \
|
109
|
-
--convergence_threshold {params.convergence_threshold} \
|
110
|
-
{'--hierarchically' if params.hierarchically else ''}
|
111
|
-
"""
|
112
|
-
shell(
|
113
|
-
f'{command}'
|
114
|
-
)
|
115
|
-
|
116
|
-
|
117
|
-
rule latent_to_gene:
|
118
|
-
input:
|
119
|
-
hdf5_with_latent_path=rules.find_latent_representations.output.hdf5_output
|
120
|
-
output:
|
121
|
-
feather_path='{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather'
|
122
|
-
params:
|
123
|
-
latent_representation="latent_GVAE",
|
124
|
-
num_neighbour=51,
|
125
|
-
num_neighbour_spatial=201,
|
126
|
-
species='MACAQUE_GENE_SYM',
|
127
|
-
gs_species='/storage/yangjianLab/songliyang/SpatialData/homologs/macaque_human_homologs.txt',
|
128
|
-
gM_slices=None,
|
129
|
-
annotation=get_annotation,
|
130
|
-
type=data_type
|
131
|
-
threads:
|
132
|
-
1
|
133
|
-
resources:
|
134
|
-
mem_mb_per_cpu=lambda wildcards, threads, attempt: 70_000 * np.log2(attempt + 1),
|
135
|
-
qos=QOS
|
136
|
-
benchmark: '{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather.benchmark'
|
137
|
-
run:
|
138
|
-
command = f"""
|
139
|
-
gsmap run_latent_to_gene \
|
140
|
-
--input_hdf5_with_latent_path {input.hdf5_with_latent_path} \
|
141
|
-
--sample_name {wildcards.sample_name} \
|
142
|
-
--output_feather_path {output.feather_path} \
|
143
|
-
{ '--annotation ' + params.annotation if params.annotation is not None else ''} \
|
144
|
-
--type {params.type} \
|
145
|
-
--latent_representation {params.latent_representation} \
|
146
|
-
--num_neighbour {params.num_neighbour} \
|
147
|
-
--num_neighbour_spatial {params.num_neighbour_spatial} \
|
148
|
-
{'--species ' + params.species if params.species is not None else ''} \
|
149
|
-
{'--gs_species ' + params.gs_species if params.gs_species is not None else ''} \
|
150
|
-
{'--gM_slices ' + params.gM_slices if params.gM_slices is not None else ''}
|
151
|
-
"""
|
152
|
-
shell(
|
153
|
-
f'{command}'
|
154
|
-
)
|
155
|
-
|
156
|
-
|
157
|
-
rule generate_ldscore:
|
158
|
-
input:
|
159
|
-
mkscore_feather_file=rules.latent_to_gene.output.feather_path
|
160
|
-
output:
|
161
|
-
done='{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done'
|
162
|
-
params:
|
163
|
-
ld_score_save_dir='{sample_name}/generate_ldscore',
|
164
|
-
gtf_annotation_file="/storage/yangjianLab/songliyang/ReferenceGenome/GRCh37/gencode.v39lift37.annotation.gtf",
|
165
|
-
bfile_root="/storage/yangjianLab/sharedata/LDSC_resource/1000G_EUR_Phase3_plink/1000G.EUR.QC",
|
166
|
-
keep_snp_root="/storage/yangjianLab/sharedata/LDSC_resource/hapmap3_snps/hm",
|
167
|
-
gene_window_size=50000,
|
168
|
-
enhancer_annotation_file=None,
|
169
|
-
snp_multiple_enhancer_strategy='max_mkscore',
|
170
|
-
gene_window_enhancer_priority=None,
|
171
|
-
spots_per_chunk=1000,
|
172
|
-
ld_wind=1,
|
173
|
-
ld_unit="CM",
|
174
|
-
additional_baseline_annotation_dir_path=None
|
175
|
-
# additional_baseline_annotation_dir_path='/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/resource/ldsc/baseline_v1.2/remove_base'
|
176
|
-
benchmark: '{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done.benchmark'
|
177
|
-
threads:
|
178
|
-
3
|
179
|
-
resources:
|
180
|
-
mem_mb_per_cpu=lambda wildcards, threads, attempt: 50_000 / threads * np.log2(attempt + 1),
|
181
|
-
qos=QOS
|
182
|
-
run:
|
183
|
-
command = f"""
|
184
|
-
gsmap run_generate_ldscore \
|
185
|
-
--sample_name {wildcards.sample_name} \
|
186
|
-
--chrom {wildcards.chrom} \
|
187
|
-
--ldscore_save_dir {params.ld_score_save_dir} \
|
188
|
-
--mkscore_feather_file {input.mkscore_feather_file} \
|
189
|
-
--bfile_root {params.bfile_root} \
|
190
|
-
--keep_snp_root {params.keep_snp_root} \
|
191
|
-
--gtf_annotation_file {params.gtf_annotation_file} \
|
192
|
-
--gene_window_size {params.gene_window_size} \
|
193
|
-
{'--enhancer_annotation_file ' + params.enhancer_annotation_file if params.enhancer_annotation_file is not None else ''} \
|
194
|
-
--snp_multiple_enhancer_strategy {params.snp_multiple_enhancer_strategy} \
|
195
|
-
{'--gene_window_enhancer_priority ' + params.gene_window_enhancer_priority if params.gene_window_enhancer_priority is not None else ''} \
|
196
|
-
--spots_per_chunk {params.spots_per_chunk} \
|
197
|
-
--ld_wind {params.ld_wind} \
|
198
|
-
--ld_unit {params.ld_unit} \
|
199
|
-
{ '--additional_baseline_annotation_dir_path ' + params.additional_baseline_annotation_dir_path if params.additional_baseline_annotation_dir_path is not None else '' }
|
200
|
-
"""
|
201
|
-
shell(command)
|
202
|
-
shell('touch {output.done}')
|
203
|
-
|
204
|
-
|
205
|
-
def get_h2_file(wildcards):
|
206
|
-
gwas_root = "/storage/yangjianLab/songliyang/GWAS_trait/LDSC"
|
207
|
-
return f"{gwas_root}/{wildcards.trait_name}.sumstats.gz",
|
208
|
-
|
209
|
-
|
210
|
-
def get_ldscore(wildcards):
|
211
|
-
if chrom == "all":
|
212
|
-
return f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{chrom}.done"
|
213
|
-
else:
|
214
|
-
assert tuple(chrom) == tuple(range(1,23)), "chrom must be all or range(1,23)"
|
215
|
-
return [f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{c}.done" for
|
216
|
-
c in chrom]
|
217
|
-
|
218
|
-
|
219
|
-
rule spatial_ldsc:
|
220
|
-
input:
|
221
|
-
# h2_file=get_h2_file,
|
222
|
-
generate_ldscore_done=get_ldscore
|
223
|
-
output:
|
224
|
-
done='{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done'
|
225
|
-
params:
|
226
|
-
ldscore_input_dir=rules.generate_ldscore.params.ld_score_save_dir,
|
227
|
-
ldsc_save_dir='{sample_name}/spatial_ldsc',
|
228
|
-
w_file="/storage/yangjianLab/sharedata/LDSC_resource/LDSC_SEG_ldscores/weights_hm3_no_hla/weights.",
|
229
|
-
sumstats_config_file='/storage/yangjianLab/chenwenhao/projects/202312_GPS/src/gsMap/example/sumstats_config_sub.yaml',
|
230
|
-
all_chunk = None
|
231
|
-
threads:
|
232
|
-
10
|
233
|
-
benchmark:
|
234
|
-
'{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done.benchmark'
|
235
|
-
resources:
|
236
|
-
mem_mb_per_cpu=lambda wildcards, threads, attempt: 40_000 / threads * np.log2(attempt + 1),
|
237
|
-
qos=QOS,
|
238
|
-
partition='intel-sc3,amd-ep2'
|
239
|
-
run:
|
240
|
-
command = f"""
|
241
|
-
gsmap run_spatial_ldsc --w_file {params.w_file} --sample_name {wildcards.sample_name} --num_processes {threads} --ldscore_input_dir {params.ldscore_input_dir} --ldsc_save_dir {params.ldsc_save_dir} --sumstats_config_file {params.sumstats_config_file} {f'--all_chunk {params.all_chunk}' if params.all_chunk else ''}
|
242
|
-
"""
|
243
|
-
shell(
|
244
|
-
f'{command}'
|
245
|
-
'touch {output.done}'
|
246
|
-
)
|
247
|
-
|
248
|
-
|
249
|
-
rule cauchy_combination:
|
250
|
-
output:
|
251
|
-
done='{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz'
|
252
|
-
input:
|
253
|
-
hdf5_path=rules.find_latent_representations.output.hdf5_output,
|
254
|
-
ldsc_done=rules.spatial_ldsc.output.done
|
255
|
-
params:
|
256
|
-
cauchy_save_dir='{sample_name}/cauchy_combination',
|
257
|
-
annotation=annotation,
|
258
|
-
ldsc_dir=rules.spatial_ldsc.params.ldsc_save_dir
|
259
|
-
benchmark:
|
260
|
-
'{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz.benchmark'
|
261
|
-
threads:
|
262
|
-
2
|
263
|
-
resources:
|
264
|
-
mem_mb_per_cpu=25_000
|
265
|
-
shell:
|
266
|
-
"""
|
267
|
-
gsmap run_cauchy_combination --input_hdf5_path {input.hdf5_path} --input_ldsc_dir {params.ldsc_dir} --sample_name {wildcards.sample_name} --output_cauchy_dir {params.cauchy_save_dir} --trait_name {wildcards.trait_name} --annotation {params.annotation}
|
268
|
-
"""
|
@@ -1,229 +0,0 @@
|
|
1
|
-
import numpy as np
|
2
|
-
|
3
|
-
workdir: '/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/GPS_test/Nature_Neuroscience_2021/snake_workdir'
|
4
|
-
sample_names = ["Cortex_151507"]
|
5
|
-
# chrom = "all"
|
6
|
-
|
7
|
-
chrom = range(1,23)
|
8
|
-
# trait_names=[
|
9
|
-
# 'ADULT1_ADULT2_ONSET_ASTHMA'
|
10
|
-
# ]
|
11
|
-
annotation= "layer_guess"
|
12
|
-
data_type = 'count'
|
13
|
-
rule all:
|
14
|
-
input:
|
15
|
-
expand('{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done', sample_name=sample_names)
|
16
|
-
# expand('{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz', trait_name=trait_names, sample_name=sample_names)
|
17
|
-
|
18
|
-
rule find_latent_representations:
|
19
|
-
input:
|
20
|
-
hdf5_path = "/storage/yangjianLab/songliyang/SpatialData/Data/Brain/Human/Nature_Neuroscience_2021/processed/h5ad/Cortex_151507.h5ad"
|
21
|
-
output:
|
22
|
-
hdf5_output='{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad'
|
23
|
-
params:
|
24
|
-
annotation=annotation,
|
25
|
-
type=data_type,
|
26
|
-
epochs=300,
|
27
|
-
feat_hidden1=256,
|
28
|
-
feat_hidden2=128,
|
29
|
-
feat_cell=3000,
|
30
|
-
gcn_hidden1=64,
|
31
|
-
gcn_hidden2=30,
|
32
|
-
p_drop=0.1,
|
33
|
-
gcn_lr=0.001,
|
34
|
-
gcn_decay=0.01,
|
35
|
-
n_neighbors=11,
|
36
|
-
label_w=1,
|
37
|
-
rec_w=1,
|
38
|
-
n_comps=300,
|
39
|
-
weighted_adj=False,
|
40
|
-
nheads=3,
|
41
|
-
var=False,
|
42
|
-
convergence_threshold=1e-4,
|
43
|
-
hierarchically=False
|
44
|
-
threads:
|
45
|
-
1
|
46
|
-
benchmark: '{sample_name}/find_latent_representations/{sample_name}_add_latent.h5ad.benchmark'
|
47
|
-
run:
|
48
|
-
command = f"""
|
49
|
-
gsmap run_find_latent_representations \
|
50
|
-
--input_hdf5_path {input.hdf5_path} \
|
51
|
-
--sample_name {wildcards.sample_name} \
|
52
|
-
--output_hdf5_path {output.hdf5_output} \
|
53
|
-
{ '--annotation ' + params.annotation if params.annotation is not None else ''} \
|
54
|
-
--type {params.type} \
|
55
|
-
--epochs {params.epochs} \
|
56
|
-
--feat_hidden1 {params.feat_hidden1} \
|
57
|
-
--feat_hidden2 {params.feat_hidden2} \
|
58
|
-
--feat_cell {params.feat_cell} \
|
59
|
-
--gcn_hidden1 {params.gcn_hidden1} \
|
60
|
-
--gcn_hidden2 {params.gcn_hidden2} \
|
61
|
-
--p_drop {params.p_drop} \
|
62
|
-
--gcn_lr {params.gcn_lr} \
|
63
|
-
--gcn_decay {params.gcn_decay} \
|
64
|
-
--n_neighbors {params.n_neighbors} \
|
65
|
-
--label_w {params.label_w} \
|
66
|
-
--rec_w {params.rec_w} \
|
67
|
-
--n_comps {params.n_comps} \
|
68
|
-
{'--weighted_adj' if params.weighted_adj else ''} \
|
69
|
-
--nheads {params.nheads} \
|
70
|
-
{'--var' if params.var else ''} \
|
71
|
-
--convergence_threshold {params.convergence_threshold} \
|
72
|
-
{'--hierarchically' if params.hierarchically else ''}
|
73
|
-
"""
|
74
|
-
shell(
|
75
|
-
f'{command}'
|
76
|
-
)
|
77
|
-
|
78
|
-
|
79
|
-
rule latent_to_gene:
|
80
|
-
input:
|
81
|
-
hdf5_with_latent_path=rules.find_latent_representations.output.hdf5_output
|
82
|
-
output:
|
83
|
-
feather_path='{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather'
|
84
|
-
params:
|
85
|
-
latent_representation="latent_GVAE",
|
86
|
-
num_neighbour=51,
|
87
|
-
num_neighbour_spatial=201,
|
88
|
-
species=None,
|
89
|
-
gs_species=None,
|
90
|
-
gM_slices=None,
|
91
|
-
annotation=annotation,
|
92
|
-
type=data_type
|
93
|
-
threads:
|
94
|
-
1
|
95
|
-
resources:
|
96
|
-
mem_mb_per_cpu=lambda wildcards, threads, attempt: 70_000 * np.log2(attempt + 1),
|
97
|
-
qos='huge'
|
98
|
-
benchmark: '{sample_name}/latent_to_gene/{sample_name}_gene_marker_score.feather.benchmark'
|
99
|
-
run:
|
100
|
-
command = f"""
|
101
|
-
gsmap run_latent_to_gene \
|
102
|
-
--input_hdf5_with_latent_path {input.hdf5_with_latent_path} \
|
103
|
-
--sample_name {wildcards.sample_name} \
|
104
|
-
--output_feather_path {output.feather_path} \
|
105
|
-
{ '--annotation ' + params.annotation if params.annotation is not None else ''} \
|
106
|
-
--type {params.type} \
|
107
|
-
--latent_representation {params.latent_representation} \
|
108
|
-
--num_neighbour {params.num_neighbour} \
|
109
|
-
--num_neighbour_spatial {params.num_neighbour_spatial} \
|
110
|
-
{'--species ' + params.species if params.species is not None else ''} \
|
111
|
-
{'--gs_species ' + params.gs_species if params.gs_species is not None else ''} \
|
112
|
-
{'--gM_slices ' + params.gM_slices if params.gM_slices is not None else ''}
|
113
|
-
"""
|
114
|
-
shell(
|
115
|
-
f'{command}'
|
116
|
-
)
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
rule generate_ldscore:
|
121
|
-
input:
|
122
|
-
mkscore_feather_file=rules.latent_to_gene.output.feather_path
|
123
|
-
output:
|
124
|
-
done='{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done'
|
125
|
-
params:
|
126
|
-
ld_score_save_dir='{sample_name}/generate_ldscore',
|
127
|
-
gtf_annotation_file="/storage/yangjianLab/songliyang/ReferenceGenome/GRCh37/gencode.v39lift37.annotation.gtf",
|
128
|
-
bfile_root="/storage/yangjianLab/sharedata/LDSC_resource/1000G_EUR_Phase3_plink/1000G.EUR.QC",
|
129
|
-
keep_snp_root="/storage/yangjianLab/sharedata/LDSC_resource/hapmap3_snps/hm",
|
130
|
-
gene_window_size=50000,
|
131
|
-
enhancer_annotation_file=None,
|
132
|
-
snp_multiple_enhancer_strategy='max_mkscore',
|
133
|
-
gene_window_enhancer_priority=None,
|
134
|
-
spots_per_chunk=5000,
|
135
|
-
ld_wind=1,
|
136
|
-
ld_unit="CM",
|
137
|
-
additional_baseline_annotation_dir_path='/storage/yangjianLab/chenwenhao/projects/202312_GPS/data/resource/ldsc/baseline_v1.2/remove_base'
|
138
|
-
benchmark: '{sample_name}/generate_ldscore/{sample_name}_generate_ldscore_chr{chrom}.done.benchmark'
|
139
|
-
threads:
|
140
|
-
3
|
141
|
-
resources:
|
142
|
-
mem_mb_per_cpu=lambda wildcards, threads, attempt: 45_000 / threads * np.log2(attempt + 1),
|
143
|
-
qos='huge'
|
144
|
-
run:
|
145
|
-
command = f"""
|
146
|
-
gsmap run_generate_ldscore \
|
147
|
-
--sample_name {wildcards.sample_name} \
|
148
|
-
--chrom {wildcards.chrom} \
|
149
|
-
--ldscore_save_dir {params.ld_score_save_dir} \
|
150
|
-
--mkscore_feather_file {input.mkscore_feather_file} \
|
151
|
-
--bfile_root {params.bfile_root} \
|
152
|
-
--keep_snp_root {params.keep_snp_root} \
|
153
|
-
--gtf_annotation_file {params.gtf_annotation_file} \
|
154
|
-
--gene_window_size {params.gene_window_size} \
|
155
|
-
{'--enhancer_annotation_file ' + params.enhancer_annotation_file if params.enhancer_annotation_file is not None else ''} \
|
156
|
-
--snp_multiple_enhancer_strategy {params.snp_multiple_enhancer_strategy} \
|
157
|
-
{'--gene_window_enhancer_priority ' + params.gene_window_enhancer_priority if params.gene_window_enhancer_priority is not None else ''} \
|
158
|
-
--spots_per_chunk {params.spots_per_chunk} \
|
159
|
-
--ld_wind {params.ld_wind} \
|
160
|
-
--ld_unit {params.ld_unit} \
|
161
|
-
{ '--additional_baseline_annotation_dir_path' + params.additional_baseline_annotation_dir_path if params.additional_baseline_annotation_dir_path is not None else '' }
|
162
|
-
"""
|
163
|
-
shell(command)
|
164
|
-
shell('touch {output.done}')
|
165
|
-
|
166
|
-
|
167
|
-
def get_h2_file(wildcards):
|
168
|
-
gwas_root = "/storage/yangjianLab/songliyang/GWAS_trait/LDSC"
|
169
|
-
return f"{gwas_root}/{wildcards.trait_name}.sumstats.gz",
|
170
|
-
|
171
|
-
|
172
|
-
def get_ldscore(wildcards):
|
173
|
-
if chrom == "all":
|
174
|
-
return f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{chrom}.done"
|
175
|
-
else:
|
176
|
-
assert tuple(chrom) == tuple(range(1,23)), "chrom must be all or range(1,23)"
|
177
|
-
return [f"{wildcards.sample_name}/generate_ldscore/{wildcards.sample_name}_generate_ldscore_chr{c}.done" for
|
178
|
-
c in chrom]
|
179
|
-
|
180
|
-
|
181
|
-
rule spatial_ldsc:
|
182
|
-
input:
|
183
|
-
# h2_file=get_h2_file,
|
184
|
-
generate_ldscore_done=get_ldscore
|
185
|
-
output:
|
186
|
-
done='{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done'
|
187
|
-
params:
|
188
|
-
ldscore_input_dir=rules.generate_ldscore.params.ld_score_save_dir,
|
189
|
-
ldsc_save_dir='{sample_name}/spatial_ldsc',
|
190
|
-
w_file="/storage/yangjianLab/sharedata/LDSC_resource/LDSC_SEG_ldscores/weights_hm3_no_hla/weights.",
|
191
|
-
sumstats_config_file='/storage/yangjianLab/chenwenhao/projects/202312_GPS/src/gsMap/example/sumstats_config_sub.yaml',
|
192
|
-
all_chunk = None
|
193
|
-
threads:
|
194
|
-
2
|
195
|
-
benchmark:
|
196
|
-
'{sample_name}/spatial_ldsc/{sample_name}.spatial_ldsc.done.benchmark'
|
197
|
-
resources:
|
198
|
-
mem_mb_per_cpu=lambda wildcards, threads, attempt: 60_000 / threads * np.log2(attempt + 1),
|
199
|
-
qos='huge'
|
200
|
-
run:
|
201
|
-
command = f"""
|
202
|
-
gsmap run_spatial_ldsc --w_file {params.w_file} --sample_name {wildcards.sample_name} --num_processes {threads} --ldscore_input_dir {params.ldscore_input_dir} --ldsc_save_dir {params.ldsc_save_dir} --sumstats_config_file {params.sumstats_config_file} {f'--all_chunk {params.all_chunk}' if params.all_chunk else ''}
|
203
|
-
"""
|
204
|
-
shell(
|
205
|
-
f'{command}'
|
206
|
-
'touch {output.done}'
|
207
|
-
)
|
208
|
-
|
209
|
-
|
210
|
-
rule cauchy_combination:
|
211
|
-
output:
|
212
|
-
done='{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz'
|
213
|
-
input:
|
214
|
-
hdf5_path=rules.find_latent_representations.output.hdf5_output,
|
215
|
-
ldsc_done=rules.spatial_ldsc.output.done
|
216
|
-
params:
|
217
|
-
cauchy_save_dir='{sample_name}/cauchy_combination',
|
218
|
-
annotation=annotation,
|
219
|
-
ldsc_dir=rules.spatial_ldsc.params.ldsc_save_dir
|
220
|
-
benchmark:
|
221
|
-
'{sample_name}/cauchy_combination/{sample_name}_{trait_name}.Cauchy.csv.gz.benchmark'
|
222
|
-
threads:
|
223
|
-
2
|
224
|
-
resources:
|
225
|
-
mem_mb_per_cpu=25_000
|
226
|
-
shell:
|
227
|
-
"""
|
228
|
-
gsmap run_cauchy_combination --input_hdf5_path {input.hdf5_path} --input_ldsc_dir {params.ldsc_dir} --sample_name {wildcards.sample_name} --output_cauchy_dir {params.cauchy_save_dir} --trait_name {wildcards.trait_name} --annotation {params.annotation}
|
229
|
-
"""
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|